{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 118,
   "id": "e8f6170e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import statistics\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "pd.options.display.max_columns= None\n",
    "pd.options.display.max_rows= None\n",
    "np.set_printoptions(suppress=True)\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import confusion_matrix\n",
    "from sklearn.metrics import accuracy_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "id": "20c00df9",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/jinhongliu/Desktop/gitee/grade-prediction/venv/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3444: DtypeWarning: Columns (0,19,49,59,118,129,130,131,134,135,136,139,145,146,147) have mixed types.Specify dtype option on import or set low_memory=False.\n",
      "  exec(code_obj, self.user_global_ns, self.user_ns)\n"
     ]
    }
   ],
   "source": [
    "data = pd.read_csv(\"data/accepted_2007_to_2018Q4.csv.gz\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "id": "709b28bd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>member_id</th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>funded_amnt</th>\n",
       "      <th>funded_amnt_inv</th>\n",
       "      <th>term</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>installment</th>\n",
       "      <th>grade</th>\n",
       "      <th>sub_grade</th>\n",
       "      <th>emp_title</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>home_ownership</th>\n",
       "      <th>annual_inc</th>\n",
       "      <th>verification_status</th>\n",
       "      <th>issue_d</th>\n",
       "      <th>loan_status</th>\n",
       "      <th>pymnt_plan</th>\n",
       "      <th>url</th>\n",
       "      <th>desc</th>\n",
       "      <th>purpose</th>\n",
       "      <th>title</th>\n",
       "      <th>zip_code</th>\n",
       "      <th>addr_state</th>\n",
       "      <th>dti</th>\n",
       "      <th>delinq_2yrs</th>\n",
       "      <th>earliest_cr_line</th>\n",
       "      <th>fico_range_low</th>\n",
       "      <th>fico_range_high</th>\n",
       "      <th>inq_last_6mths</th>\n",
       "      <th>mths_since_last_delinq</th>\n",
       "      <th>mths_since_last_record</th>\n",
       "      <th>open_acc</th>\n",
       "      <th>pub_rec</th>\n",
       "      <th>revol_bal</th>\n",
       "      <th>revol_util</th>\n",
       "      <th>total_acc</th>\n",
       "      <th>initial_list_status</th>\n",
       "      <th>out_prncp</th>\n",
       "      <th>out_prncp_inv</th>\n",
       "      <th>total_pymnt</th>\n",
       "      <th>total_pymnt_inv</th>\n",
       "      <th>total_rec_prncp</th>\n",
       "      <th>total_rec_int</th>\n",
       "      <th>total_rec_late_fee</th>\n",
       "      <th>recoveries</th>\n",
       "      <th>collection_recovery_fee</th>\n",
       "      <th>last_pymnt_d</th>\n",
       "      <th>last_pymnt_amnt</th>\n",
       "      <th>next_pymnt_d</th>\n",
       "      <th>last_credit_pull_d</th>\n",
       "      <th>last_fico_range_high</th>\n",
       "      <th>last_fico_range_low</th>\n",
       "      <th>collections_12_mths_ex_med</th>\n",
       "      <th>mths_since_last_major_derog</th>\n",
       "      <th>policy_code</th>\n",
       "      <th>application_type</th>\n",
       "      <th>annual_inc_joint</th>\n",
       "      <th>dti_joint</th>\n",
       "      <th>verification_status_joint</th>\n",
       "      <th>acc_now_delinq</th>\n",
       "      <th>tot_coll_amt</th>\n",
       "      <th>tot_cur_bal</th>\n",
       "      <th>open_acc_6m</th>\n",
       "      <th>open_act_il</th>\n",
       "      <th>open_il_12m</th>\n",
       "      <th>open_il_24m</th>\n",
       "      <th>mths_since_rcnt_il</th>\n",
       "      <th>total_bal_il</th>\n",
       "      <th>il_util</th>\n",
       "      <th>open_rv_12m</th>\n",
       "      <th>open_rv_24m</th>\n",
       "      <th>max_bal_bc</th>\n",
       "      <th>all_util</th>\n",
       "      <th>total_rev_hi_lim</th>\n",
       "      <th>inq_fi</th>\n",
       "      <th>total_cu_tl</th>\n",
       "      <th>inq_last_12m</th>\n",
       "      <th>acc_open_past_24mths</th>\n",
       "      <th>avg_cur_bal</th>\n",
       "      <th>bc_open_to_buy</th>\n",
       "      <th>bc_util</th>\n",
       "      <th>chargeoff_within_12_mths</th>\n",
       "      <th>delinq_amnt</th>\n",
       "      <th>mo_sin_old_il_acct</th>\n",
       "      <th>mo_sin_old_rev_tl_op</th>\n",
       "      <th>mo_sin_rcnt_rev_tl_op</th>\n",
       "      <th>mo_sin_rcnt_tl</th>\n",
       "      <th>mort_acc</th>\n",
       "      <th>mths_since_recent_bc</th>\n",
       "      <th>mths_since_recent_bc_dlq</th>\n",
       "      <th>mths_since_recent_inq</th>\n",
       "      <th>mths_since_recent_revol_delinq</th>\n",
       "      <th>num_accts_ever_120_pd</th>\n",
       "      <th>num_actv_bc_tl</th>\n",
       "      <th>num_actv_rev_tl</th>\n",
       "      <th>num_bc_sats</th>\n",
       "      <th>num_bc_tl</th>\n",
       "      <th>num_il_tl</th>\n",
       "      <th>num_op_rev_tl</th>\n",
       "      <th>num_rev_accts</th>\n",
       "      <th>num_rev_tl_bal_gt_0</th>\n",
       "      <th>num_sats</th>\n",
       "      <th>num_tl_120dpd_2m</th>\n",
       "      <th>num_tl_30dpd</th>\n",
       "      <th>num_tl_90g_dpd_24m</th>\n",
       "      <th>num_tl_op_past_12m</th>\n",
       "      <th>pct_tl_nvr_dlq</th>\n",
       "      <th>percent_bc_gt_75</th>\n",
       "      <th>pub_rec_bankruptcies</th>\n",
       "      <th>tax_liens</th>\n",
       "      <th>tot_hi_cred_lim</th>\n",
       "      <th>total_bal_ex_mort</th>\n",
       "      <th>total_bc_limit</th>\n",
       "      <th>total_il_high_credit_limit</th>\n",
       "      <th>revol_bal_joint</th>\n",
       "      <th>sec_app_fico_range_low</th>\n",
       "      <th>sec_app_fico_range_high</th>\n",
       "      <th>sec_app_earliest_cr_line</th>\n",
       "      <th>sec_app_inq_last_6mths</th>\n",
       "      <th>sec_app_mort_acc</th>\n",
       "      <th>sec_app_open_acc</th>\n",
       "      <th>sec_app_revol_util</th>\n",
       "      <th>sec_app_open_act_il</th>\n",
       "      <th>sec_app_num_rev_accts</th>\n",
       "      <th>sec_app_chargeoff_within_12_mths</th>\n",
       "      <th>sec_app_collections_12_mths_ex_med</th>\n",
       "      <th>sec_app_mths_since_last_major_derog</th>\n",
       "      <th>hardship_flag</th>\n",
       "      <th>hardship_type</th>\n",
       "      <th>hardship_reason</th>\n",
       "      <th>hardship_status</th>\n",
       "      <th>deferral_term</th>\n",
       "      <th>hardship_amount</th>\n",
       "      <th>hardship_start_date</th>\n",
       "      <th>hardship_end_date</th>\n",
       "      <th>payment_plan_start_date</th>\n",
       "      <th>hardship_length</th>\n",
       "      <th>hardship_dpd</th>\n",
       "      <th>hardship_loan_status</th>\n",
       "      <th>orig_projected_additional_accrued_interest</th>\n",
       "      <th>hardship_payoff_balance_amount</th>\n",
       "      <th>hardship_last_payment_amount</th>\n",
       "      <th>disbursement_method</th>\n",
       "      <th>debt_settlement_flag</th>\n",
       "      <th>debt_settlement_flag_date</th>\n",
       "      <th>settlement_status</th>\n",
       "      <th>settlement_date</th>\n",
       "      <th>settlement_amount</th>\n",
       "      <th>settlement_percentage</th>\n",
       "      <th>settlement_term</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>68407277</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3600.0</td>\n",
       "      <td>3600.0</td>\n",
       "      <td>3600.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>13.99</td>\n",
       "      <td>123.03</td>\n",
       "      <td>C</td>\n",
       "      <td>C4</td>\n",
       "      <td>leadman</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>55000.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Dec-2015</td>\n",
       "      <td>Fully Paid</td>\n",
       "      <td>n</td>\n",
       "      <td>https://lendingclub.com/browse/loanDetail.acti...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>debt_consolidation</td>\n",
       "      <td>Debt consolidation</td>\n",
       "      <td>190xx</td>\n",
       "      <td>PA</td>\n",
       "      <td>5.91</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Aug-2003</td>\n",
       "      <td>675.0</td>\n",
       "      <td>679.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2765.0</td>\n",
       "      <td>29.7</td>\n",
       "      <td>13.0</td>\n",
       "      <td>w</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>4421.723917</td>\n",
       "      <td>4421.72</td>\n",
       "      <td>3600.00</td>\n",
       "      <td>821.72</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Jan-2019</td>\n",
       "      <td>122.67</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Mar-2019</td>\n",
       "      <td>564.0</td>\n",
       "      <td>560.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Individual</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>722.0</td>\n",
       "      <td>144904.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>4981.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>722.0</td>\n",
       "      <td>34.0</td>\n",
       "      <td>9300.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>20701.0</td>\n",
       "      <td>1506.0</td>\n",
       "      <td>37.2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>128.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>76.9</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>178050.0</td>\n",
       "      <td>7746.0</td>\n",
       "      <td>2400.0</td>\n",
       "      <td>13734.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Cash</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>68355089</td>\n",
       "      <td>NaN</td>\n",
       "      <td>24700.0</td>\n",
       "      <td>24700.0</td>\n",
       "      <td>24700.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>11.99</td>\n",
       "      <td>820.28</td>\n",
       "      <td>C</td>\n",
       "      <td>C1</td>\n",
       "      <td>Engineer</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>65000.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Dec-2015</td>\n",
       "      <td>Fully Paid</td>\n",
       "      <td>n</td>\n",
       "      <td>https://lendingclub.com/browse/loanDetail.acti...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>small_business</td>\n",
       "      <td>Business</td>\n",
       "      <td>577xx</td>\n",
       "      <td>SD</td>\n",
       "      <td>16.06</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Dec-1999</td>\n",
       "      <td>715.0</td>\n",
       "      <td>719.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>22.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21470.0</td>\n",
       "      <td>19.2</td>\n",
       "      <td>38.0</td>\n",
       "      <td>w</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>25679.660000</td>\n",
       "      <td>25679.66</td>\n",
       "      <td>24700.00</td>\n",
       "      <td>979.66</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Jun-2016</td>\n",
       "      <td>926.35</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Mar-2019</td>\n",
       "      <td>699.0</td>\n",
       "      <td>695.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Individual</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>204396.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>18005.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6472.0</td>\n",
       "      <td>29.0</td>\n",
       "      <td>111800.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9733.0</td>\n",
       "      <td>57830.0</td>\n",
       "      <td>27.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>113.0</td>\n",
       "      <td>192.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>27.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>97.4</td>\n",
       "      <td>7.7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>314017.0</td>\n",
       "      <td>39475.0</td>\n",
       "      <td>79300.0</td>\n",
       "      <td>24667.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Cash</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>68341763</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>60 months</td>\n",
       "      <td>10.78</td>\n",
       "      <td>432.66</td>\n",
       "      <td>B</td>\n",
       "      <td>B4</td>\n",
       "      <td>truck driver</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>63000.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Dec-2015</td>\n",
       "      <td>Fully Paid</td>\n",
       "      <td>n</td>\n",
       "      <td>https://lendingclub.com/browse/loanDetail.acti...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>home_improvement</td>\n",
       "      <td>NaN</td>\n",
       "      <td>605xx</td>\n",
       "      <td>IL</td>\n",
       "      <td>10.78</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Aug-2000</td>\n",
       "      <td>695.0</td>\n",
       "      <td>699.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7869.0</td>\n",
       "      <td>56.2</td>\n",
       "      <td>18.0</td>\n",
       "      <td>w</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>22705.924294</td>\n",
       "      <td>22705.92</td>\n",
       "      <td>20000.00</td>\n",
       "      <td>2705.92</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Jun-2017</td>\n",
       "      <td>15813.30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Mar-2019</td>\n",
       "      <td>704.0</td>\n",
       "      <td>700.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Joint App</td>\n",
       "      <td>71000.0</td>\n",
       "      <td>13.85</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>189699.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>10827.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2081.0</td>\n",
       "      <td>65.0</td>\n",
       "      <td>14000.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>31617.0</td>\n",
       "      <td>2737.0</td>\n",
       "      <td>55.9</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>125.0</td>\n",
       "      <td>184.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>101.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>218418.0</td>\n",
       "      <td>18696.0</td>\n",
       "      <td>6200.0</td>\n",
       "      <td>14877.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Cash</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>66310712</td>\n",
       "      <td>NaN</td>\n",
       "      <td>35000.0</td>\n",
       "      <td>35000.0</td>\n",
       "      <td>35000.0</td>\n",
       "      <td>60 months</td>\n",
       "      <td>14.85</td>\n",
       "      <td>829.90</td>\n",
       "      <td>C</td>\n",
       "      <td>C5</td>\n",
       "      <td>Information Systems Officer</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>110000.0</td>\n",
       "      <td>Source Verified</td>\n",
       "      <td>Dec-2015</td>\n",
       "      <td>Current</td>\n",
       "      <td>n</td>\n",
       "      <td>https://lendingclub.com/browse/loanDetail.acti...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>debt_consolidation</td>\n",
       "      <td>Debt consolidation</td>\n",
       "      <td>076xx</td>\n",
       "      <td>NJ</td>\n",
       "      <td>17.06</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Sep-2008</td>\n",
       "      <td>785.0</td>\n",
       "      <td>789.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7802.0</td>\n",
       "      <td>11.6</td>\n",
       "      <td>17.0</td>\n",
       "      <td>w</td>\n",
       "      <td>15897.65</td>\n",
       "      <td>15897.65</td>\n",
       "      <td>31464.010000</td>\n",
       "      <td>31464.01</td>\n",
       "      <td>19102.35</td>\n",
       "      <td>12361.66</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Feb-2019</td>\n",
       "      <td>829.90</td>\n",
       "      <td>Apr-2019</td>\n",
       "      <td>Mar-2019</td>\n",
       "      <td>679.0</td>\n",
       "      <td>675.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Individual</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>301500.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>12609.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6987.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>67300.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>23192.0</td>\n",
       "      <td>54962.0</td>\n",
       "      <td>12.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>87.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>381215.0</td>\n",
       "      <td>52226.0</td>\n",
       "      <td>62500.0</td>\n",
       "      <td>18000.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Cash</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>68476807</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10400.0</td>\n",
       "      <td>10400.0</td>\n",
       "      <td>10400.0</td>\n",
       "      <td>60 months</td>\n",
       "      <td>22.45</td>\n",
       "      <td>289.91</td>\n",
       "      <td>F</td>\n",
       "      <td>F1</td>\n",
       "      <td>Contract Specialist</td>\n",
       "      <td>3 years</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>104433.0</td>\n",
       "      <td>Source Verified</td>\n",
       "      <td>Dec-2015</td>\n",
       "      <td>Fully Paid</td>\n",
       "      <td>n</td>\n",
       "      <td>https://lendingclub.com/browse/loanDetail.acti...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>major_purchase</td>\n",
       "      <td>Major purchase</td>\n",
       "      <td>174xx</td>\n",
       "      <td>PA</td>\n",
       "      <td>25.37</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Jun-1998</td>\n",
       "      <td>695.0</td>\n",
       "      <td>699.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21929.0</td>\n",
       "      <td>64.5</td>\n",
       "      <td>35.0</td>\n",
       "      <td>w</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>11740.500000</td>\n",
       "      <td>11740.50</td>\n",
       "      <td>10400.00</td>\n",
       "      <td>1340.50</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Jul-2016</td>\n",
       "      <td>10128.96</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Mar-2018</td>\n",
       "      <td>704.0</td>\n",
       "      <td>700.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Individual</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>331730.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>73839.0</td>\n",
       "      <td>84.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>9702.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>34000.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>27644.0</td>\n",
       "      <td>4567.0</td>\n",
       "      <td>77.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>128.0</td>\n",
       "      <td>210.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>96.6</td>\n",
       "      <td>60.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>439570.0</td>\n",
       "      <td>95768.0</td>\n",
       "      <td>20300.0</td>\n",
       "      <td>88097.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Cash</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         id  member_id  loan_amnt  funded_amnt  funded_amnt_inv        term  \\\n",
       "0  68407277        NaN     3600.0       3600.0           3600.0   36 months   \n",
       "1  68355089        NaN    24700.0      24700.0          24700.0   36 months   \n",
       "2  68341763        NaN    20000.0      20000.0          20000.0   60 months   \n",
       "3  66310712        NaN    35000.0      35000.0          35000.0   60 months   \n",
       "4  68476807        NaN    10400.0      10400.0          10400.0   60 months   \n",
       "\n",
       "   int_rate  installment grade sub_grade                    emp_title  \\\n",
       "0     13.99       123.03     C        C4                      leadman   \n",
       "1     11.99       820.28     C        C1                     Engineer   \n",
       "2     10.78       432.66     B        B4                 truck driver   \n",
       "3     14.85       829.90     C        C5  Information Systems Officer   \n",
       "4     22.45       289.91     F        F1          Contract Specialist   \n",
       "\n",
       "  emp_length home_ownership  annual_inc verification_status   issue_d  \\\n",
       "0  10+ years       MORTGAGE     55000.0        Not Verified  Dec-2015   \n",
       "1  10+ years       MORTGAGE     65000.0        Not Verified  Dec-2015   \n",
       "2  10+ years       MORTGAGE     63000.0        Not Verified  Dec-2015   \n",
       "3  10+ years       MORTGAGE    110000.0     Source Verified  Dec-2015   \n",
       "4    3 years       MORTGAGE    104433.0     Source Verified  Dec-2015   \n",
       "\n",
       "  loan_status pymnt_plan                                                url  \\\n",
       "0  Fully Paid          n  https://lendingclub.com/browse/loanDetail.acti...   \n",
       "1  Fully Paid          n  https://lendingclub.com/browse/loanDetail.acti...   \n",
       "2  Fully Paid          n  https://lendingclub.com/browse/loanDetail.acti...   \n",
       "3     Current          n  https://lendingclub.com/browse/loanDetail.acti...   \n",
       "4  Fully Paid          n  https://lendingclub.com/browse/loanDetail.acti...   \n",
       "\n",
       "  desc             purpose               title zip_code addr_state    dti  \\\n",
       "0  NaN  debt_consolidation  Debt consolidation    190xx         PA   5.91   \n",
       "1  NaN      small_business            Business    577xx         SD  16.06   \n",
       "2  NaN    home_improvement                 NaN    605xx         IL  10.78   \n",
       "3  NaN  debt_consolidation  Debt consolidation    076xx         NJ  17.06   \n",
       "4  NaN      major_purchase      Major purchase    174xx         PA  25.37   \n",
       "\n",
       "   delinq_2yrs earliest_cr_line  fico_range_low  fico_range_high  \\\n",
       "0          0.0         Aug-2003           675.0            679.0   \n",
       "1          1.0         Dec-1999           715.0            719.0   \n",
       "2          0.0         Aug-2000           695.0            699.0   \n",
       "3          0.0         Sep-2008           785.0            789.0   \n",
       "4          1.0         Jun-1998           695.0            699.0   \n",
       "\n",
       "   inq_last_6mths  mths_since_last_delinq  mths_since_last_record  open_acc  \\\n",
       "0             1.0                    30.0                     NaN       7.0   \n",
       "1             4.0                     6.0                     NaN      22.0   \n",
       "2             0.0                     NaN                     NaN       6.0   \n",
       "3             0.0                     NaN                     NaN      13.0   \n",
       "4             3.0                    12.0                     NaN      12.0   \n",
       "\n",
       "   pub_rec  revol_bal  revol_util  total_acc initial_list_status  out_prncp  \\\n",
       "0      0.0     2765.0        29.7       13.0                   w       0.00   \n",
       "1      0.0    21470.0        19.2       38.0                   w       0.00   \n",
       "2      0.0     7869.0        56.2       18.0                   w       0.00   \n",
       "3      0.0     7802.0        11.6       17.0                   w   15897.65   \n",
       "4      0.0    21929.0        64.5       35.0                   w       0.00   \n",
       "\n",
       "   out_prncp_inv   total_pymnt  total_pymnt_inv  total_rec_prncp  \\\n",
       "0           0.00   4421.723917          4421.72          3600.00   \n",
       "1           0.00  25679.660000         25679.66         24700.00   \n",
       "2           0.00  22705.924294         22705.92         20000.00   \n",
       "3       15897.65  31464.010000         31464.01         19102.35   \n",
       "4           0.00  11740.500000         11740.50         10400.00   \n",
       "\n",
       "   total_rec_int  total_rec_late_fee  recoveries  collection_recovery_fee  \\\n",
       "0         821.72                 0.0         0.0                      0.0   \n",
       "1         979.66                 0.0         0.0                      0.0   \n",
       "2        2705.92                 0.0         0.0                      0.0   \n",
       "3       12361.66                 0.0         0.0                      0.0   \n",
       "4        1340.50                 0.0         0.0                      0.0   \n",
       "\n",
       "  last_pymnt_d  last_pymnt_amnt next_pymnt_d last_credit_pull_d  \\\n",
       "0     Jan-2019           122.67          NaN           Mar-2019   \n",
       "1     Jun-2016           926.35          NaN           Mar-2019   \n",
       "2     Jun-2017         15813.30          NaN           Mar-2019   \n",
       "3     Feb-2019           829.90     Apr-2019           Mar-2019   \n",
       "4     Jul-2016         10128.96          NaN           Mar-2018   \n",
       "\n",
       "   last_fico_range_high  last_fico_range_low  collections_12_mths_ex_med  \\\n",
       "0                 564.0                560.0                         0.0   \n",
       "1                 699.0                695.0                         0.0   \n",
       "2                 704.0                700.0                         0.0   \n",
       "3                 679.0                675.0                         0.0   \n",
       "4                 704.0                700.0                         0.0   \n",
       "\n",
       "   mths_since_last_major_derog  policy_code application_type  \\\n",
       "0                         30.0          1.0       Individual   \n",
       "1                          NaN          1.0       Individual   \n",
       "2                          NaN          1.0        Joint App   \n",
       "3                          NaN          1.0       Individual   \n",
       "4                          NaN          1.0       Individual   \n",
       "\n",
       "   annual_inc_joint  dti_joint verification_status_joint  acc_now_delinq  \\\n",
       "0               NaN        NaN                       NaN             0.0   \n",
       "1               NaN        NaN                       NaN             0.0   \n",
       "2           71000.0      13.85              Not Verified             0.0   \n",
       "3               NaN        NaN                       NaN             0.0   \n",
       "4               NaN        NaN                       NaN             0.0   \n",
       "\n",
       "   tot_coll_amt  tot_cur_bal  open_acc_6m  open_act_il  open_il_12m  \\\n",
       "0         722.0     144904.0          2.0          2.0          0.0   \n",
       "1           0.0     204396.0          1.0          1.0          0.0   \n",
       "2           0.0     189699.0          0.0          1.0          0.0   \n",
       "3           0.0     301500.0          1.0          1.0          0.0   \n",
       "4           0.0     331730.0          1.0          3.0          0.0   \n",
       "\n",
       "   open_il_24m  mths_since_rcnt_il  total_bal_il  il_util  open_rv_12m  \\\n",
       "0          1.0                21.0        4981.0     36.0          3.0   \n",
       "1          1.0                19.0       18005.0     73.0          2.0   \n",
       "2          4.0                19.0       10827.0     73.0          0.0   \n",
       "3          1.0                23.0       12609.0     70.0          1.0   \n",
       "4          3.0                14.0       73839.0     84.0          4.0   \n",
       "\n",
       "   open_rv_24m  max_bal_bc  all_util  total_rev_hi_lim  inq_fi  total_cu_tl  \\\n",
       "0          3.0       722.0      34.0            9300.0     3.0          1.0   \n",
       "1          3.0      6472.0      29.0          111800.0     0.0          0.0   \n",
       "2          2.0      2081.0      65.0           14000.0     2.0          5.0   \n",
       "3          1.0      6987.0      45.0           67300.0     0.0          1.0   \n",
       "4          7.0      9702.0      78.0           34000.0     2.0          1.0   \n",
       "\n",
       "   inq_last_12m  acc_open_past_24mths  avg_cur_bal  bc_open_to_buy  bc_util  \\\n",
       "0           4.0                   4.0      20701.0          1506.0     37.2   \n",
       "1           6.0                   4.0       9733.0         57830.0     27.1   \n",
       "2           1.0                   6.0      31617.0          2737.0     55.9   \n",
       "3           0.0                   2.0      23192.0         54962.0     12.1   \n",
       "4           3.0                  10.0      27644.0          4567.0     77.5   \n",
       "\n",
       "   chargeoff_within_12_mths  delinq_amnt  mo_sin_old_il_acct  \\\n",
       "0                       0.0          0.0               148.0   \n",
       "1                       0.0          0.0               113.0   \n",
       "2                       0.0          0.0               125.0   \n",
       "3                       0.0          0.0                36.0   \n",
       "4                       0.0          0.0               128.0   \n",
       "\n",
       "   mo_sin_old_rev_tl_op  mo_sin_rcnt_rev_tl_op  mo_sin_rcnt_tl  mort_acc  \\\n",
       "0                 128.0                    3.0             3.0       1.0   \n",
       "1                 192.0                    2.0             2.0       4.0   \n",
       "2                 184.0                   14.0            14.0       5.0   \n",
       "3                  87.0                    2.0             2.0       1.0   \n",
       "4                 210.0                    4.0             4.0       6.0   \n",
       "\n",
       "   mths_since_recent_bc  mths_since_recent_bc_dlq  mths_since_recent_inq  \\\n",
       "0                   4.0                      69.0                    4.0   \n",
       "1                   2.0                       NaN                    0.0   \n",
       "2                 101.0                       NaN                   10.0   \n",
       "3                   2.0                       NaN                    NaN   \n",
       "4                   4.0                      12.0                    1.0   \n",
       "\n",
       "   mths_since_recent_revol_delinq  num_accts_ever_120_pd  num_actv_bc_tl  \\\n",
       "0                            69.0                    2.0             2.0   \n",
       "1                             6.0                    0.0             5.0   \n",
       "2                             NaN                    0.0             2.0   \n",
       "3                             NaN                    0.0             4.0   \n",
       "4                            12.0                    0.0             4.0   \n",
       "\n",
       "   num_actv_rev_tl  num_bc_sats  num_bc_tl  num_il_tl  num_op_rev_tl  \\\n",
       "0              4.0          2.0        5.0        3.0            4.0   \n",
       "1              5.0         13.0       17.0        6.0           20.0   \n",
       "2              3.0          2.0        4.0        6.0            4.0   \n",
       "3              5.0          8.0       10.0        2.0           10.0   \n",
       "4              6.0          5.0        9.0       10.0            7.0   \n",
       "\n",
       "   num_rev_accts  num_rev_tl_bal_gt_0  num_sats  num_tl_120dpd_2m  \\\n",
       "0            9.0                  4.0       7.0               0.0   \n",
       "1           27.0                  5.0      22.0               0.0   \n",
       "2            7.0                  3.0       6.0               0.0   \n",
       "3           13.0                  5.0      13.0               0.0   \n",
       "4           19.0                  6.0      12.0               0.0   \n",
       "\n",
       "   num_tl_30dpd  num_tl_90g_dpd_24m  num_tl_op_past_12m  pct_tl_nvr_dlq  \\\n",
       "0           0.0                 0.0                 3.0            76.9   \n",
       "1           0.0                 0.0                 2.0            97.4   \n",
       "2           0.0                 0.0                 0.0           100.0   \n",
       "3           0.0                 0.0                 1.0           100.0   \n",
       "4           0.0                 0.0                 4.0            96.6   \n",
       "\n",
       "   percent_bc_gt_75  pub_rec_bankruptcies  tax_liens  tot_hi_cred_lim  \\\n",
       "0               0.0                   0.0        0.0         178050.0   \n",
       "1               7.7                   0.0        0.0         314017.0   \n",
       "2              50.0                   0.0        0.0         218418.0   \n",
       "3               0.0                   0.0        0.0         381215.0   \n",
       "4              60.0                   0.0        0.0         439570.0   \n",
       "\n",
       "   total_bal_ex_mort  total_bc_limit  total_il_high_credit_limit  \\\n",
       "0             7746.0          2400.0                     13734.0   \n",
       "1            39475.0         79300.0                     24667.0   \n",
       "2            18696.0          6200.0                     14877.0   \n",
       "3            52226.0         62500.0                     18000.0   \n",
       "4            95768.0         20300.0                     88097.0   \n",
       "\n",
       "   revol_bal_joint  sec_app_fico_range_low  sec_app_fico_range_high  \\\n",
       "0              NaN                     NaN                      NaN   \n",
       "1              NaN                     NaN                      NaN   \n",
       "2              NaN                     NaN                      NaN   \n",
       "3              NaN                     NaN                      NaN   \n",
       "4              NaN                     NaN                      NaN   \n",
       "\n",
       "  sec_app_earliest_cr_line  sec_app_inq_last_6mths  sec_app_mort_acc  \\\n",
       "0                      NaN                     NaN               NaN   \n",
       "1                      NaN                     NaN               NaN   \n",
       "2                      NaN                     NaN               NaN   \n",
       "3                      NaN                     NaN               NaN   \n",
       "4                      NaN                     NaN               NaN   \n",
       "\n",
       "   sec_app_open_acc  sec_app_revol_util  sec_app_open_act_il  \\\n",
       "0               NaN                 NaN                  NaN   \n",
       "1               NaN                 NaN                  NaN   \n",
       "2               NaN                 NaN                  NaN   \n",
       "3               NaN                 NaN                  NaN   \n",
       "4               NaN                 NaN                  NaN   \n",
       "\n",
       "   sec_app_num_rev_accts  sec_app_chargeoff_within_12_mths  \\\n",
       "0                    NaN                               NaN   \n",
       "1                    NaN                               NaN   \n",
       "2                    NaN                               NaN   \n",
       "3                    NaN                               NaN   \n",
       "4                    NaN                               NaN   \n",
       "\n",
       "   sec_app_collections_12_mths_ex_med  sec_app_mths_since_last_major_derog  \\\n",
       "0                                 NaN                                  NaN   \n",
       "1                                 NaN                                  NaN   \n",
       "2                                 NaN                                  NaN   \n",
       "3                                 NaN                                  NaN   \n",
       "4                                 NaN                                  NaN   \n",
       "\n",
       "  hardship_flag hardship_type hardship_reason hardship_status  deferral_term  \\\n",
       "0             N           NaN             NaN             NaN            NaN   \n",
       "1             N           NaN             NaN             NaN            NaN   \n",
       "2             N           NaN             NaN             NaN            NaN   \n",
       "3             N           NaN             NaN             NaN            NaN   \n",
       "4             N           NaN             NaN             NaN            NaN   \n",
       "\n",
       "   hardship_amount hardship_start_date hardship_end_date  \\\n",
       "0              NaN                 NaN               NaN   \n",
       "1              NaN                 NaN               NaN   \n",
       "2              NaN                 NaN               NaN   \n",
       "3              NaN                 NaN               NaN   \n",
       "4              NaN                 NaN               NaN   \n",
       "\n",
       "  payment_plan_start_date  hardship_length  hardship_dpd hardship_loan_status  \\\n",
       "0                     NaN              NaN           NaN                  NaN   \n",
       "1                     NaN              NaN           NaN                  NaN   \n",
       "2                     NaN              NaN           NaN                  NaN   \n",
       "3                     NaN              NaN           NaN                  NaN   \n",
       "4                     NaN              NaN           NaN                  NaN   \n",
       "\n",
       "   orig_projected_additional_accrued_interest  hardship_payoff_balance_amount  \\\n",
       "0                                         NaN                             NaN   \n",
       "1                                         NaN                             NaN   \n",
       "2                                         NaN                             NaN   \n",
       "3                                         NaN                             NaN   \n",
       "4                                         NaN                             NaN   \n",
       "\n",
       "   hardship_last_payment_amount disbursement_method debt_settlement_flag  \\\n",
       "0                           NaN                Cash                    N   \n",
       "1                           NaN                Cash                    N   \n",
       "2                           NaN                Cash                    N   \n",
       "3                           NaN                Cash                    N   \n",
       "4                           NaN                Cash                    N   \n",
       "\n",
       "  debt_settlement_flag_date settlement_status settlement_date  \\\n",
       "0                       NaN               NaN             NaN   \n",
       "1                       NaN               NaN             NaN   \n",
       "2                       NaN               NaN             NaN   \n",
       "3                       NaN               NaN             NaN   \n",
       "4                       NaN               NaN             NaN   \n",
       "\n",
       "   settlement_amount  settlement_percentage  settlement_term  \n",
       "0                NaN                    NaN              NaN  \n",
       "1                NaN                    NaN              NaN  \n",
       "2                NaN                    NaN              NaN  \n",
       "3                NaN                    NaN              NaN  \n",
       "4                NaN                    NaN              NaN  "
      ]
     },
     "execution_count": 120,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "id": "1f9bc6ac",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 2260701 entries, 0 to 2260700\n",
      "Data columns (total 151 columns):\n",
      " #    Column                                      Dtype  \n",
      "---   ------                                      -----  \n",
      " 0    id                                          object \n",
      " 1    member_id                                   float64\n",
      " 2    loan_amnt                                   float64\n",
      " 3    funded_amnt                                 float64\n",
      " 4    funded_amnt_inv                             float64\n",
      " 5    term                                        object \n",
      " 6    int_rate                                    float64\n",
      " 7    installment                                 float64\n",
      " 8    grade                                       object \n",
      " 9    sub_grade                                   object \n",
      " 10   emp_title                                   object \n",
      " 11   emp_length                                  object \n",
      " 12   home_ownership                              object \n",
      " 13   annual_inc                                  float64\n",
      " 14   verification_status                         object \n",
      " 15   issue_d                                     object \n",
      " 16   loan_status                                 object \n",
      " 17   pymnt_plan                                  object \n",
      " 18   url                                         object \n",
      " 19   desc                                        object \n",
      " 20   purpose                                     object \n",
      " 21   title                                       object \n",
      " 22   zip_code                                    object \n",
      " 23   addr_state                                  object \n",
      " 24   dti                                         float64\n",
      " 25   delinq_2yrs                                 float64\n",
      " 26   earliest_cr_line                            object \n",
      " 27   fico_range_low                              float64\n",
      " 28   fico_range_high                             float64\n",
      " 29   inq_last_6mths                              float64\n",
      " 30   mths_since_last_delinq                      float64\n",
      " 31   mths_since_last_record                      float64\n",
      " 32   open_acc                                    float64\n",
      " 33   pub_rec                                     float64\n",
      " 34   revol_bal                                   float64\n",
      " 35   revol_util                                  float64\n",
      " 36   total_acc                                   float64\n",
      " 37   initial_list_status                         object \n",
      " 38   out_prncp                                   float64\n",
      " 39   out_prncp_inv                               float64\n",
      " 40   total_pymnt                                 float64\n",
      " 41   total_pymnt_inv                             float64\n",
      " 42   total_rec_prncp                             float64\n",
      " 43   total_rec_int                               float64\n",
      " 44   total_rec_late_fee                          float64\n",
      " 45   recoveries                                  float64\n",
      " 46   collection_recovery_fee                     float64\n",
      " 47   last_pymnt_d                                object \n",
      " 48   last_pymnt_amnt                             float64\n",
      " 49   next_pymnt_d                                object \n",
      " 50   last_credit_pull_d                          object \n",
      " 51   last_fico_range_high                        float64\n",
      " 52   last_fico_range_low                         float64\n",
      " 53   collections_12_mths_ex_med                  float64\n",
      " 54   mths_since_last_major_derog                 float64\n",
      " 55   policy_code                                 float64\n",
      " 56   application_type                            object \n",
      " 57   annual_inc_joint                            float64\n",
      " 58   dti_joint                                   float64\n",
      " 59   verification_status_joint                   object \n",
      " 60   acc_now_delinq                              float64\n",
      " 61   tot_coll_amt                                float64\n",
      " 62   tot_cur_bal                                 float64\n",
      " 63   open_acc_6m                                 float64\n",
      " 64   open_act_il                                 float64\n",
      " 65   open_il_12m                                 float64\n",
      " 66   open_il_24m                                 float64\n",
      " 67   mths_since_rcnt_il                          float64\n",
      " 68   total_bal_il                                float64\n",
      " 69   il_util                                     float64\n",
      " 70   open_rv_12m                                 float64\n",
      " 71   open_rv_24m                                 float64\n",
      " 72   max_bal_bc                                  float64\n",
      " 73   all_util                                    float64\n",
      " 74   total_rev_hi_lim                            float64\n",
      " 75   inq_fi                                      float64\n",
      " 76   total_cu_tl                                 float64\n",
      " 77   inq_last_12m                                float64\n",
      " 78   acc_open_past_24mths                        float64\n",
      " 79   avg_cur_bal                                 float64\n",
      " 80   bc_open_to_buy                              float64\n",
      " 81   bc_util                                     float64\n",
      " 82   chargeoff_within_12_mths                    float64\n",
      " 83   delinq_amnt                                 float64\n",
      " 84   mo_sin_old_il_acct                          float64\n",
      " 85   mo_sin_old_rev_tl_op                        float64\n",
      " 86   mo_sin_rcnt_rev_tl_op                       float64\n",
      " 87   mo_sin_rcnt_tl                              float64\n",
      " 88   mort_acc                                    float64\n",
      " 89   mths_since_recent_bc                        float64\n",
      " 90   mths_since_recent_bc_dlq                    float64\n",
      " 91   mths_since_recent_inq                       float64\n",
      " 92   mths_since_recent_revol_delinq              float64\n",
      " 93   num_accts_ever_120_pd                       float64\n",
      " 94   num_actv_bc_tl                              float64\n",
      " 95   num_actv_rev_tl                             float64\n",
      " 96   num_bc_sats                                 float64\n",
      " 97   num_bc_tl                                   float64\n",
      " 98   num_il_tl                                   float64\n",
      " 99   num_op_rev_tl                               float64\n",
      " 100  num_rev_accts                               float64\n",
      " 101  num_rev_tl_bal_gt_0                         float64\n",
      " 102  num_sats                                    float64\n",
      " 103  num_tl_120dpd_2m                            float64\n",
      " 104  num_tl_30dpd                                float64\n",
      " 105  num_tl_90g_dpd_24m                          float64\n",
      " 106  num_tl_op_past_12m                          float64\n",
      " 107  pct_tl_nvr_dlq                              float64\n",
      " 108  percent_bc_gt_75                            float64\n",
      " 109  pub_rec_bankruptcies                        float64\n",
      " 110  tax_liens                                   float64\n",
      " 111  tot_hi_cred_lim                             float64\n",
      " 112  total_bal_ex_mort                           float64\n",
      " 113  total_bc_limit                              float64\n",
      " 114  total_il_high_credit_limit                  float64\n",
      " 115  revol_bal_joint                             float64\n",
      " 116  sec_app_fico_range_low                      float64\n",
      " 117  sec_app_fico_range_high                     float64\n",
      " 118  sec_app_earliest_cr_line                    object \n",
      " 119  sec_app_inq_last_6mths                      float64\n",
      " 120  sec_app_mort_acc                            float64\n",
      " 121  sec_app_open_acc                            float64\n",
      " 122  sec_app_revol_util                          float64\n",
      " 123  sec_app_open_act_il                         float64\n",
      " 124  sec_app_num_rev_accts                       float64\n",
      " 125  sec_app_chargeoff_within_12_mths            float64\n",
      " 126  sec_app_collections_12_mths_ex_med          float64\n",
      " 127  sec_app_mths_since_last_major_derog         float64\n",
      " 128  hardship_flag                               object \n",
      " 129  hardship_type                               object \n",
      " 130  hardship_reason                             object \n",
      " 131  hardship_status                             object \n",
      " 132  deferral_term                               float64\n",
      " 133  hardship_amount                             float64\n",
      " 134  hardship_start_date                         object \n",
      " 135  hardship_end_date                           object \n",
      " 136  payment_plan_start_date                     object \n",
      " 137  hardship_length                             float64\n",
      " 138  hardship_dpd                                float64\n",
      " 139  hardship_loan_status                        object \n",
      " 140  orig_projected_additional_accrued_interest  float64\n",
      " 141  hardship_payoff_balance_amount              float64\n",
      " 142  hardship_last_payment_amount                float64\n",
      " 143  disbursement_method                         object \n",
      " 144  debt_settlement_flag                        object \n",
      " 145  debt_settlement_flag_date                   object \n",
      " 146  settlement_status                           object \n",
      " 147  settlement_date                             object \n",
      " 148  settlement_amount                           float64\n",
      " 149  settlement_percentage                       float64\n",
      " 150  settlement_term                             float64\n",
      "dtypes: float64(113), object(38)\n",
      "memory usage: 2.5+ GB\n"
     ]
    }
   ],
   "source": [
    "data.info(verbose= True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "id": "29fc1681",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>member_id</th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>funded_amnt</th>\n",
       "      <th>funded_amnt_inv</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>installment</th>\n",
       "      <th>annual_inc</th>\n",
       "      <th>dti</th>\n",
       "      <th>delinq_2yrs</th>\n",
       "      <th>fico_range_low</th>\n",
       "      <th>fico_range_high</th>\n",
       "      <th>inq_last_6mths</th>\n",
       "      <th>mths_since_last_delinq</th>\n",
       "      <th>mths_since_last_record</th>\n",
       "      <th>open_acc</th>\n",
       "      <th>pub_rec</th>\n",
       "      <th>revol_bal</th>\n",
       "      <th>revol_util</th>\n",
       "      <th>total_acc</th>\n",
       "      <th>out_prncp</th>\n",
       "      <th>out_prncp_inv</th>\n",
       "      <th>total_pymnt</th>\n",
       "      <th>total_pymnt_inv</th>\n",
       "      <th>total_rec_prncp</th>\n",
       "      <th>total_rec_int</th>\n",
       "      <th>total_rec_late_fee</th>\n",
       "      <th>recoveries</th>\n",
       "      <th>collection_recovery_fee</th>\n",
       "      <th>last_pymnt_amnt</th>\n",
       "      <th>last_fico_range_high</th>\n",
       "      <th>last_fico_range_low</th>\n",
       "      <th>collections_12_mths_ex_med</th>\n",
       "      <th>mths_since_last_major_derog</th>\n",
       "      <th>policy_code</th>\n",
       "      <th>annual_inc_joint</th>\n",
       "      <th>dti_joint</th>\n",
       "      <th>acc_now_delinq</th>\n",
       "      <th>tot_coll_amt</th>\n",
       "      <th>tot_cur_bal</th>\n",
       "      <th>open_acc_6m</th>\n",
       "      <th>open_act_il</th>\n",
       "      <th>open_il_12m</th>\n",
       "      <th>open_il_24m</th>\n",
       "      <th>mths_since_rcnt_il</th>\n",
       "      <th>total_bal_il</th>\n",
       "      <th>il_util</th>\n",
       "      <th>open_rv_12m</th>\n",
       "      <th>open_rv_24m</th>\n",
       "      <th>max_bal_bc</th>\n",
       "      <th>all_util</th>\n",
       "      <th>total_rev_hi_lim</th>\n",
       "      <th>inq_fi</th>\n",
       "      <th>total_cu_tl</th>\n",
       "      <th>inq_last_12m</th>\n",
       "      <th>acc_open_past_24mths</th>\n",
       "      <th>avg_cur_bal</th>\n",
       "      <th>bc_open_to_buy</th>\n",
       "      <th>bc_util</th>\n",
       "      <th>chargeoff_within_12_mths</th>\n",
       "      <th>delinq_amnt</th>\n",
       "      <th>mo_sin_old_il_acct</th>\n",
       "      <th>mo_sin_old_rev_tl_op</th>\n",
       "      <th>mo_sin_rcnt_rev_tl_op</th>\n",
       "      <th>mo_sin_rcnt_tl</th>\n",
       "      <th>mort_acc</th>\n",
       "      <th>mths_since_recent_bc</th>\n",
       "      <th>mths_since_recent_bc_dlq</th>\n",
       "      <th>mths_since_recent_inq</th>\n",
       "      <th>mths_since_recent_revol_delinq</th>\n",
       "      <th>num_accts_ever_120_pd</th>\n",
       "      <th>num_actv_bc_tl</th>\n",
       "      <th>num_actv_rev_tl</th>\n",
       "      <th>num_bc_sats</th>\n",
       "      <th>num_bc_tl</th>\n",
       "      <th>num_il_tl</th>\n",
       "      <th>num_op_rev_tl</th>\n",
       "      <th>num_rev_accts</th>\n",
       "      <th>num_rev_tl_bal_gt_0</th>\n",
       "      <th>num_sats</th>\n",
       "      <th>num_tl_120dpd_2m</th>\n",
       "      <th>num_tl_30dpd</th>\n",
       "      <th>num_tl_90g_dpd_24m</th>\n",
       "      <th>num_tl_op_past_12m</th>\n",
       "      <th>pct_tl_nvr_dlq</th>\n",
       "      <th>percent_bc_gt_75</th>\n",
       "      <th>pub_rec_bankruptcies</th>\n",
       "      <th>tax_liens</th>\n",
       "      <th>tot_hi_cred_lim</th>\n",
       "      <th>total_bal_ex_mort</th>\n",
       "      <th>total_bc_limit</th>\n",
       "      <th>total_il_high_credit_limit</th>\n",
       "      <th>revol_bal_joint</th>\n",
       "      <th>sec_app_fico_range_low</th>\n",
       "      <th>sec_app_fico_range_high</th>\n",
       "      <th>sec_app_inq_last_6mths</th>\n",
       "      <th>sec_app_mort_acc</th>\n",
       "      <th>sec_app_open_acc</th>\n",
       "      <th>sec_app_revol_util</th>\n",
       "      <th>sec_app_open_act_il</th>\n",
       "      <th>sec_app_num_rev_accts</th>\n",
       "      <th>sec_app_chargeoff_within_12_mths</th>\n",
       "      <th>sec_app_collections_12_mths_ex_med</th>\n",
       "      <th>sec_app_mths_since_last_major_derog</th>\n",
       "      <th>deferral_term</th>\n",
       "      <th>hardship_amount</th>\n",
       "      <th>hardship_length</th>\n",
       "      <th>hardship_dpd</th>\n",
       "      <th>orig_projected_additional_accrued_interest</th>\n",
       "      <th>hardship_payoff_balance_amount</th>\n",
       "      <th>hardship_last_payment_amount</th>\n",
       "      <th>settlement_amount</th>\n",
       "      <th>settlement_percentage</th>\n",
       "      <th>settlement_term</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>0.0</td>\n",
       "      <td>2.260668e+06</td>\n",
       "      <td>2.260668e+06</td>\n",
       "      <td>2.260668e+06</td>\n",
       "      <td>2.260668e+06</td>\n",
       "      <td>2.260668e+06</td>\n",
       "      <td>2.260664e+06</td>\n",
       "      <td>2.258957e+06</td>\n",
       "      <td>2.260639e+06</td>\n",
       "      <td>2.260668e+06</td>\n",
       "      <td>2.260668e+06</td>\n",
       "      <td>2.260638e+06</td>\n",
       "      <td>1.102166e+06</td>\n",
       "      <td>359156.000000</td>\n",
       "      <td>2.260639e+06</td>\n",
       "      <td>2.260639e+06</td>\n",
       "      <td>2.260668e+06</td>\n",
       "      <td>2.258866e+06</td>\n",
       "      <td>2.260639e+06</td>\n",
       "      <td>2.260668e+06</td>\n",
       "      <td>2.260668e+06</td>\n",
       "      <td>2.260668e+06</td>\n",
       "      <td>2.260668e+06</td>\n",
       "      <td>2.260668e+06</td>\n",
       "      <td>2.260668e+06</td>\n",
       "      <td>2.260668e+06</td>\n",
       "      <td>2.260668e+06</td>\n",
       "      <td>2.260668e+06</td>\n",
       "      <td>2.260668e+06</td>\n",
       "      <td>2.260668e+06</td>\n",
       "      <td>2.260668e+06</td>\n",
       "      <td>2.260523e+06</td>\n",
       "      <td>580775.000000</td>\n",
       "      <td>2260668.0</td>\n",
       "      <td>1.207100e+05</td>\n",
       "      <td>120706.000000</td>\n",
       "      <td>2.260639e+06</td>\n",
       "      <td>2.190392e+06</td>\n",
       "      <td>2.190392e+06</td>\n",
       "      <td>1.394538e+06</td>\n",
       "      <td>1.394539e+06</td>\n",
       "      <td>1.394539e+06</td>\n",
       "      <td>1.394539e+06</td>\n",
       "      <td>1.350744e+06</td>\n",
       "      <td>1.394539e+06</td>\n",
       "      <td>1.191818e+06</td>\n",
       "      <td>1.394539e+06</td>\n",
       "      <td>1.394539e+06</td>\n",
       "      <td>1.394539e+06</td>\n",
       "      <td>1.394320e+06</td>\n",
       "      <td>2.190392e+06</td>\n",
       "      <td>1.394539e+06</td>\n",
       "      <td>1.394538e+06</td>\n",
       "      <td>1.394538e+06</td>\n",
       "      <td>2.210638e+06</td>\n",
       "      <td>2.190322e+06</td>\n",
       "      <td>2.185733e+06</td>\n",
       "      <td>2.184597e+06</td>\n",
       "      <td>2.260523e+06</td>\n",
       "      <td>2.260639e+06</td>\n",
       "      <td>2.121597e+06</td>\n",
       "      <td>2.190391e+06</td>\n",
       "      <td>2.190391e+06</td>\n",
       "      <td>2.190392e+06</td>\n",
       "      <td>2.210638e+06</td>\n",
       "      <td>2.187256e+06</td>\n",
       "      <td>519701.000000</td>\n",
       "      <td>1.965233e+06</td>\n",
       "      <td>740359.000000</td>\n",
       "      <td>2.190392e+06</td>\n",
       "      <td>2.190392e+06</td>\n",
       "      <td>2.190392e+06</td>\n",
       "      <td>2.202078e+06</td>\n",
       "      <td>2.190392e+06</td>\n",
       "      <td>2.190392e+06</td>\n",
       "      <td>2.190392e+06</td>\n",
       "      <td>2.190391e+06</td>\n",
       "      <td>2.190392e+06</td>\n",
       "      <td>2.202078e+06</td>\n",
       "      <td>2.107011e+06</td>\n",
       "      <td>2.190392e+06</td>\n",
       "      <td>2.190392e+06</td>\n",
       "      <td>2.190392e+06</td>\n",
       "      <td>2.190237e+06</td>\n",
       "      <td>2.185289e+06</td>\n",
       "      <td>2.259303e+06</td>\n",
       "      <td>2.260563e+06</td>\n",
       "      <td>2.190392e+06</td>\n",
       "      <td>2.210638e+06</td>\n",
       "      <td>2.210638e+06</td>\n",
       "      <td>2.190392e+06</td>\n",
       "      <td>1.080200e+05</td>\n",
       "      <td>108021.000000</td>\n",
       "      <td>108021.000000</td>\n",
       "      <td>108021.000000</td>\n",
       "      <td>108021.000000</td>\n",
       "      <td>108021.000000</td>\n",
       "      <td>106184.000000</td>\n",
       "      <td>108021.000000</td>\n",
       "      <td>108021.000000</td>\n",
       "      <td>108021.000000</td>\n",
       "      <td>108021.000000</td>\n",
       "      <td>35942.000000</td>\n",
       "      <td>10917.0</td>\n",
       "      <td>10917.000000</td>\n",
       "      <td>10917.0</td>\n",
       "      <td>10917.000000</td>\n",
       "      <td>8651.000000</td>\n",
       "      <td>10917.000000</td>\n",
       "      <td>10917.000000</td>\n",
       "      <td>34246.000000</td>\n",
       "      <td>34246.000000</td>\n",
       "      <td>34246.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1.504693e+04</td>\n",
       "      <td>1.504166e+04</td>\n",
       "      <td>1.502344e+04</td>\n",
       "      <td>1.309283e+01</td>\n",
       "      <td>4.458068e+02</td>\n",
       "      <td>7.799243e+04</td>\n",
       "      <td>1.882420e+01</td>\n",
       "      <td>3.068792e-01</td>\n",
       "      <td>6.985882e+02</td>\n",
       "      <td>7.025884e+02</td>\n",
       "      <td>5.768354e-01</td>\n",
       "      <td>3.454092e+01</td>\n",
       "      <td>72.312842</td>\n",
       "      <td>1.161240e+01</td>\n",
       "      <td>1.975278e-01</td>\n",
       "      <td>1.665846e+04</td>\n",
       "      <td>5.033770e+01</td>\n",
       "      <td>2.416255e+01</td>\n",
       "      <td>4.206891e+03</td>\n",
       "      <td>4.205965e+03</td>\n",
       "      <td>1.208256e+04</td>\n",
       "      <td>1.206439e+04</td>\n",
       "      <td>9.505772e+03</td>\n",
       "      <td>2.431388e+03</td>\n",
       "      <td>1.518453e+00</td>\n",
       "      <td>1.438791e+02</td>\n",
       "      <td>2.398257e+01</td>\n",
       "      <td>3.429346e+03</td>\n",
       "      <td>6.876610e+02</td>\n",
       "      <td>6.755397e+02</td>\n",
       "      <td>1.814580e-02</td>\n",
       "      <td>44.164220</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.236246e+05</td>\n",
       "      <td>19.251817</td>\n",
       "      <td>4.147942e-03</td>\n",
       "      <td>2.327317e+02</td>\n",
       "      <td>1.424922e+05</td>\n",
       "      <td>9.344199e-01</td>\n",
       "      <td>2.779407e+00</td>\n",
       "      <td>6.764314e-01</td>\n",
       "      <td>1.562752e+00</td>\n",
       "      <td>2.122236e+01</td>\n",
       "      <td>3.550665e+04</td>\n",
       "      <td>6.914098e+01</td>\n",
       "      <td>1.290133e+00</td>\n",
       "      <td>2.749923e+00</td>\n",
       "      <td>5.806393e+03</td>\n",
       "      <td>5.703230e+01</td>\n",
       "      <td>3.457394e+04</td>\n",
       "      <td>1.012867e+00</td>\n",
       "      <td>1.477304e+00</td>\n",
       "      <td>2.036667e+00</td>\n",
       "      <td>4.521656e+00</td>\n",
       "      <td>1.354780e+04</td>\n",
       "      <td>1.139426e+04</td>\n",
       "      <td>5.789995e+01</td>\n",
       "      <td>8.464413e-03</td>\n",
       "      <td>1.236983e+01</td>\n",
       "      <td>1.257378e+02</td>\n",
       "      <td>1.814916e+02</td>\n",
       "      <td>1.402409e+01</td>\n",
       "      <td>8.297469e+00</td>\n",
       "      <td>1.555382e+00</td>\n",
       "      <td>2.484485e+01</td>\n",
       "      <td>39.303090</td>\n",
       "      <td>7.024194e+00</td>\n",
       "      <td>35.782223</td>\n",
       "      <td>5.002082e-01</td>\n",
       "      <td>3.676069e+00</td>\n",
       "      <td>5.629468e+00</td>\n",
       "      <td>4.774183e+00</td>\n",
       "      <td>7.726402e+00</td>\n",
       "      <td>8.413439e+00</td>\n",
       "      <td>8.246523e+00</td>\n",
       "      <td>1.400463e+01</td>\n",
       "      <td>5.577951e+00</td>\n",
       "      <td>1.162813e+01</td>\n",
       "      <td>6.373958e-04</td>\n",
       "      <td>2.813652e-03</td>\n",
       "      <td>8.293767e-02</td>\n",
       "      <td>2.076755e+00</td>\n",
       "      <td>9.411458e+01</td>\n",
       "      <td>4.243513e+01</td>\n",
       "      <td>1.281935e-01</td>\n",
       "      <td>4.677109e-02</td>\n",
       "      <td>1.782428e+05</td>\n",
       "      <td>5.102294e+04</td>\n",
       "      <td>2.319377e+04</td>\n",
       "      <td>4.373201e+04</td>\n",
       "      <td>3.361728e+04</td>\n",
       "      <td>669.755603</td>\n",
       "      <td>673.755631</td>\n",
       "      <td>0.633256</td>\n",
       "      <td>1.538997</td>\n",
       "      <td>11.469455</td>\n",
       "      <td>58.169101</td>\n",
       "      <td>3.010554</td>\n",
       "      <td>12.533072</td>\n",
       "      <td>0.046352</td>\n",
       "      <td>0.077568</td>\n",
       "      <td>36.937928</td>\n",
       "      <td>3.0</td>\n",
       "      <td>155.045981</td>\n",
       "      <td>3.0</td>\n",
       "      <td>13.743886</td>\n",
       "      <td>454.798089</td>\n",
       "      <td>11636.883942</td>\n",
       "      <td>193.994321</td>\n",
       "      <td>5010.664267</td>\n",
       "      <td>47.780365</td>\n",
       "      <td>13.191322</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>NaN</td>\n",
       "      <td>9.190245e+03</td>\n",
       "      <td>9.188413e+03</td>\n",
       "      <td>9.192332e+03</td>\n",
       "      <td>4.832138e+00</td>\n",
       "      <td>2.671735e+02</td>\n",
       "      <td>1.126962e+05</td>\n",
       "      <td>1.418333e+01</td>\n",
       "      <td>8.672303e-01</td>\n",
       "      <td>3.301038e+01</td>\n",
       "      <td>3.301124e+01</td>\n",
       "      <td>8.859632e-01</td>\n",
       "      <td>2.190047e+01</td>\n",
       "      <td>26.464094</td>\n",
       "      <td>5.640861e+00</td>\n",
       "      <td>5.705150e-01</td>\n",
       "      <td>2.294831e+04</td>\n",
       "      <td>2.471307e+01</td>\n",
       "      <td>1.198753e+01</td>\n",
       "      <td>7.343239e+03</td>\n",
       "      <td>7.342333e+03</td>\n",
       "      <td>9.901383e+03</td>\n",
       "      <td>9.896992e+03</td>\n",
       "      <td>8.321852e+03</td>\n",
       "      <td>2.679738e+03</td>\n",
       "      <td>1.184159e+01</td>\n",
       "      <td>7.481640e+02</td>\n",
       "      <td>1.312256e+02</td>\n",
       "      <td>6.018248e+03</td>\n",
       "      <td>7.297044e+01</td>\n",
       "      <td>1.110976e+02</td>\n",
       "      <td>1.508131e-01</td>\n",
       "      <td>21.533121</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.416135e+04</td>\n",
       "      <td>7.822086</td>\n",
       "      <td>6.961656e-02</td>\n",
       "      <td>8.518462e+03</td>\n",
       "      <td>1.606926e+05</td>\n",
       "      <td>1.140700e+00</td>\n",
       "      <td>3.000784e+00</td>\n",
       "      <td>9.256354e-01</td>\n",
       "      <td>1.578672e+00</td>\n",
       "      <td>2.604919e+01</td>\n",
       "      <td>4.409746e+04</td>\n",
       "      <td>2.374839e+01</td>\n",
       "      <td>1.506827e+00</td>\n",
       "      <td>2.596911e+00</td>\n",
       "      <td>5.690561e+03</td>\n",
       "      <td>2.090475e+01</td>\n",
       "      <td>3.672850e+04</td>\n",
       "      <td>1.489456e+00</td>\n",
       "      <td>2.672991e+00</td>\n",
       "      <td>2.383117e+00</td>\n",
       "      <td>3.164229e+00</td>\n",
       "      <td>1.647408e+04</td>\n",
       "      <td>1.659953e+04</td>\n",
       "      <td>2.858347e+01</td>\n",
       "      <td>1.048098e-01</td>\n",
       "      <td>7.264648e+02</td>\n",
       "      <td>5.338218e+01</td>\n",
       "      <td>9.711845e+01</td>\n",
       "      <td>1.753308e+01</td>\n",
       "      <td>9.208557e+00</td>\n",
       "      <td>1.904981e+00</td>\n",
       "      <td>3.231925e+01</td>\n",
       "      <td>22.617689</td>\n",
       "      <td>5.965411e+00</td>\n",
       "      <td>22.307239</td>\n",
       "      <td>1.350326e+00</td>\n",
       "      <td>2.324646e+00</td>\n",
       "      <td>3.382874e+00</td>\n",
       "      <td>3.037921e+00</td>\n",
       "      <td>4.701430e+00</td>\n",
       "      <td>7.359114e+00</td>\n",
       "      <td>4.683928e+00</td>\n",
       "      <td>8.038868e+00</td>\n",
       "      <td>3.293434e+00</td>\n",
       "      <td>5.644027e+00</td>\n",
       "      <td>2.710643e-02</td>\n",
       "      <td>5.616522e-02</td>\n",
       "      <td>4.935732e-01</td>\n",
       "      <td>1.830711e+00</td>\n",
       "      <td>9.036140e+00</td>\n",
       "      <td>3.621616e+01</td>\n",
       "      <td>3.646130e-01</td>\n",
       "      <td>3.775338e-01</td>\n",
       "      <td>1.815748e+05</td>\n",
       "      <td>4.991124e+04</td>\n",
       "      <td>2.300656e+04</td>\n",
       "      <td>4.507298e+04</td>\n",
       "      <td>2.815387e+04</td>\n",
       "      <td>44.729163</td>\n",
       "      <td>44.729272</td>\n",
       "      <td>0.993401</td>\n",
       "      <td>1.760569</td>\n",
       "      <td>6.627271</td>\n",
       "      <td>25.548212</td>\n",
       "      <td>3.275893</td>\n",
       "      <td>8.150964</td>\n",
       "      <td>0.411496</td>\n",
       "      <td>0.407996</td>\n",
       "      <td>23.924584</td>\n",
       "      <td>0.0</td>\n",
       "      <td>129.040594</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.671178</td>\n",
       "      <td>375.385500</td>\n",
       "      <td>7625.988281</td>\n",
       "      <td>198.629496</td>\n",
       "      <td>3693.122590</td>\n",
       "      <td>7.311822</td>\n",
       "      <td>8.159980</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>NaN</td>\n",
       "      <td>5.000000e+02</td>\n",
       "      <td>5.000000e+02</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>5.310000e+00</td>\n",
       "      <td>4.930000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>-1.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>6.100000e+02</td>\n",
       "      <td>6.140000e+02</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>-9.500000e-09</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.693510e+03</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>540.000000</td>\n",
       "      <td>544.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.640000</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.920000</td>\n",
       "      <td>55.730000</td>\n",
       "      <td>0.010000</td>\n",
       "      <td>44.210000</td>\n",
       "      <td>0.200000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>NaN</td>\n",
       "      <td>8.000000e+03</td>\n",
       "      <td>8.000000e+03</td>\n",
       "      <td>8.000000e+03</td>\n",
       "      <td>9.490000e+00</td>\n",
       "      <td>2.516500e+02</td>\n",
       "      <td>4.600000e+04</td>\n",
       "      <td>1.189000e+01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>6.750000e+02</td>\n",
       "      <td>6.790000e+02</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.600000e+01</td>\n",
       "      <td>55.000000</td>\n",
       "      <td>8.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>5.950000e+03</td>\n",
       "      <td>3.150000e+01</td>\n",
       "      <td>1.500000e+01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>4.546458e+03</td>\n",
       "      <td>4.531800e+03</td>\n",
       "      <td>3.000000e+03</td>\n",
       "      <td>7.281875e+02</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>3.103300e+02</td>\n",
       "      <td>6.540000e+02</td>\n",
       "      <td>6.500000e+02</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>27.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>8.340000e+04</td>\n",
       "      <td>13.530000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>2.909200e+04</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>7.000000e+00</td>\n",
       "      <td>8.695000e+03</td>\n",
       "      <td>5.500000e+01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>2.284000e+03</td>\n",
       "      <td>4.300000e+01</td>\n",
       "      <td>1.470000e+04</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>2.000000e+00</td>\n",
       "      <td>3.080000e+03</td>\n",
       "      <td>1.722000e+03</td>\n",
       "      <td>3.540000e+01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>9.600000e+01</td>\n",
       "      <td>1.160000e+02</td>\n",
       "      <td>4.000000e+00</td>\n",
       "      <td>3.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>6.000000e+00</td>\n",
       "      <td>21.000000</td>\n",
       "      <td>2.000000e+00</td>\n",
       "      <td>17.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>2.000000e+00</td>\n",
       "      <td>3.000000e+00</td>\n",
       "      <td>3.000000e+00</td>\n",
       "      <td>4.000000e+00</td>\n",
       "      <td>3.000000e+00</td>\n",
       "      <td>5.000000e+00</td>\n",
       "      <td>8.000000e+00</td>\n",
       "      <td>3.000000e+00</td>\n",
       "      <td>8.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>9.130000e+01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>5.073100e+04</td>\n",
       "      <td>2.089200e+04</td>\n",
       "      <td>8.300000e+03</td>\n",
       "      <td>1.500000e+04</td>\n",
       "      <td>1.510675e+04</td>\n",
       "      <td>645.000000</td>\n",
       "      <td>649.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>39.800000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>16.000000</td>\n",
       "      <td>3.0</td>\n",
       "      <td>59.440000</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>175.230000</td>\n",
       "      <td>5627.000000</td>\n",
       "      <td>44.440000</td>\n",
       "      <td>2208.000000</td>\n",
       "      <td>45.000000</td>\n",
       "      <td>6.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1.290000e+04</td>\n",
       "      <td>1.287500e+04</td>\n",
       "      <td>1.280000e+04</td>\n",
       "      <td>1.262000e+01</td>\n",
       "      <td>3.779900e+02</td>\n",
       "      <td>6.500000e+04</td>\n",
       "      <td>1.784000e+01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>6.900000e+02</td>\n",
       "      <td>6.940000e+02</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>3.100000e+01</td>\n",
       "      <td>74.000000</td>\n",
       "      <td>1.100000e+01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.132400e+04</td>\n",
       "      <td>5.030000e+01</td>\n",
       "      <td>2.200000e+01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>9.329720e+03</td>\n",
       "      <td>9.309675e+03</td>\n",
       "      <td>7.000000e+03</td>\n",
       "      <td>1.525940e+03</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>6.005700e+02</td>\n",
       "      <td>6.990000e+02</td>\n",
       "      <td>6.950000e+02</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>44.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.100000e+05</td>\n",
       "      <td>18.840000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>7.924000e+04</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>2.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.300000e+01</td>\n",
       "      <td>2.312700e+04</td>\n",
       "      <td>7.200000e+01</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>2.000000e+00</td>\n",
       "      <td>4.413000e+03</td>\n",
       "      <td>5.800000e+01</td>\n",
       "      <td>2.540000e+04</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>4.000000e+00</td>\n",
       "      <td>7.335000e+03</td>\n",
       "      <td>5.442000e+03</td>\n",
       "      <td>6.020000e+01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.300000e+02</td>\n",
       "      <td>1.640000e+02</td>\n",
       "      <td>8.000000e+00</td>\n",
       "      <td>6.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.400000e+01</td>\n",
       "      <td>37.000000</td>\n",
       "      <td>5.000000e+00</td>\n",
       "      <td>33.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>3.000000e+00</td>\n",
       "      <td>5.000000e+00</td>\n",
       "      <td>4.000000e+00</td>\n",
       "      <td>7.000000e+00</td>\n",
       "      <td>6.000000e+00</td>\n",
       "      <td>7.000000e+00</td>\n",
       "      <td>1.200000e+01</td>\n",
       "      <td>5.000000e+00</td>\n",
       "      <td>1.100000e+01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>2.000000e+00</td>\n",
       "      <td>1.000000e+02</td>\n",
       "      <td>3.750000e+01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.142985e+05</td>\n",
       "      <td>3.786400e+04</td>\n",
       "      <td>1.630000e+04</td>\n",
       "      <td>3.269600e+04</td>\n",
       "      <td>2.651650e+04</td>\n",
       "      <td>670.000000</td>\n",
       "      <td>674.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>10.000000</td>\n",
       "      <td>60.200000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>11.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>36.000000</td>\n",
       "      <td>3.0</td>\n",
       "      <td>119.140000</td>\n",
       "      <td>3.0</td>\n",
       "      <td>15.000000</td>\n",
       "      <td>352.770000</td>\n",
       "      <td>10028.390000</td>\n",
       "      <td>133.160000</td>\n",
       "      <td>4146.110000</td>\n",
       "      <td>45.000000</td>\n",
       "      <td>14.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2.000000e+04</td>\n",
       "      <td>2.000000e+04</td>\n",
       "      <td>2.000000e+04</td>\n",
       "      <td>1.599000e+01</td>\n",
       "      <td>5.933200e+02</td>\n",
       "      <td>9.300000e+04</td>\n",
       "      <td>2.449000e+01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>7.150000e+02</td>\n",
       "      <td>7.190000e+02</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>5.000000e+01</td>\n",
       "      <td>92.000000</td>\n",
       "      <td>1.400000e+01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>2.024600e+04</td>\n",
       "      <td>6.940000e+01</td>\n",
       "      <td>3.100000e+01</td>\n",
       "      <td>6.149940e+03</td>\n",
       "      <td>6.146310e+03</td>\n",
       "      <td>1.694087e+04</td>\n",
       "      <td>1.691671e+04</td>\n",
       "      <td>1.389910e+04</td>\n",
       "      <td>3.108062e+03</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>3.743750e+03</td>\n",
       "      <td>7.340000e+02</td>\n",
       "      <td>7.300000e+02</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>62.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.479950e+05</td>\n",
       "      <td>24.620000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>2.132040e+05</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>3.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>2.000000e+00</td>\n",
       "      <td>2.400000e+01</td>\n",
       "      <td>4.609500e+04</td>\n",
       "      <td>8.600000e+01</td>\n",
       "      <td>2.000000e+00</td>\n",
       "      <td>4.000000e+00</td>\n",
       "      <td>7.598000e+03</td>\n",
       "      <td>7.200000e+01</td>\n",
       "      <td>4.320000e+04</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>2.000000e+00</td>\n",
       "      <td>3.000000e+00</td>\n",
       "      <td>6.000000e+00</td>\n",
       "      <td>1.878300e+04</td>\n",
       "      <td>1.418700e+04</td>\n",
       "      <td>8.310000e+01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.540000e+02</td>\n",
       "      <td>2.320000e+02</td>\n",
       "      <td>1.700000e+01</td>\n",
       "      <td>1.100000e+01</td>\n",
       "      <td>3.000000e+00</td>\n",
       "      <td>3.000000e+01</td>\n",
       "      <td>57.000000</td>\n",
       "      <td>1.100000e+01</td>\n",
       "      <td>51.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>5.000000e+00</td>\n",
       "      <td>7.000000e+00</td>\n",
       "      <td>6.000000e+00</td>\n",
       "      <td>1.000000e+01</td>\n",
       "      <td>1.100000e+01</td>\n",
       "      <td>1.000000e+01</td>\n",
       "      <td>1.800000e+01</td>\n",
       "      <td>7.000000e+00</td>\n",
       "      <td>1.400000e+01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>3.000000e+00</td>\n",
       "      <td>1.000000e+02</td>\n",
       "      <td>7.140000e+01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>2.577550e+05</td>\n",
       "      <td>6.435000e+04</td>\n",
       "      <td>3.030000e+04</td>\n",
       "      <td>5.880425e+04</td>\n",
       "      <td>4.376900e+04</td>\n",
       "      <td>695.000000</td>\n",
       "      <td>699.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>15.000000</td>\n",
       "      <td>78.600000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>17.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>56.000000</td>\n",
       "      <td>3.0</td>\n",
       "      <td>213.260000</td>\n",
       "      <td>3.0</td>\n",
       "      <td>22.000000</td>\n",
       "      <td>620.175000</td>\n",
       "      <td>16151.890000</td>\n",
       "      <td>284.190000</td>\n",
       "      <td>6850.172500</td>\n",
       "      <td>50.000000</td>\n",
       "      <td>18.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4.000000e+04</td>\n",
       "      <td>4.000000e+04</td>\n",
       "      <td>4.000000e+04</td>\n",
       "      <td>3.099000e+01</td>\n",
       "      <td>1.719830e+03</td>\n",
       "      <td>1.100000e+08</td>\n",
       "      <td>9.990000e+02</td>\n",
       "      <td>5.800000e+01</td>\n",
       "      <td>8.450000e+02</td>\n",
       "      <td>8.500000e+02</td>\n",
       "      <td>3.300000e+01</td>\n",
       "      <td>2.260000e+02</td>\n",
       "      <td>129.000000</td>\n",
       "      <td>1.010000e+02</td>\n",
       "      <td>8.600000e+01</td>\n",
       "      <td>2.904836e+06</td>\n",
       "      <td>8.923000e+02</td>\n",
       "      <td>1.760000e+02</td>\n",
       "      <td>4.000000e+04</td>\n",
       "      <td>4.000000e+04</td>\n",
       "      <td>6.329688e+04</td>\n",
       "      <td>6.329688e+04</td>\n",
       "      <td>4.000000e+04</td>\n",
       "      <td>2.819250e+04</td>\n",
       "      <td>1.484340e+03</td>\n",
       "      <td>3.985955e+04</td>\n",
       "      <td>7.174719e+03</td>\n",
       "      <td>4.219205e+04</td>\n",
       "      <td>8.500000e+02</td>\n",
       "      <td>8.450000e+02</td>\n",
       "      <td>2.000000e+01</td>\n",
       "      <td>226.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>7.874821e+06</td>\n",
       "      <td>69.490000</td>\n",
       "      <td>1.400000e+01</td>\n",
       "      <td>9.152545e+06</td>\n",
       "      <td>9.971659e+06</td>\n",
       "      <td>1.800000e+01</td>\n",
       "      <td>5.700000e+01</td>\n",
       "      <td>2.500000e+01</td>\n",
       "      <td>5.100000e+01</td>\n",
       "      <td>5.110000e+02</td>\n",
       "      <td>1.837038e+06</td>\n",
       "      <td>1.000000e+03</td>\n",
       "      <td>2.800000e+01</td>\n",
       "      <td>6.000000e+01</td>\n",
       "      <td>1.170668e+06</td>\n",
       "      <td>2.390000e+02</td>\n",
       "      <td>9.999999e+06</td>\n",
       "      <td>4.800000e+01</td>\n",
       "      <td>1.110000e+02</td>\n",
       "      <td>6.700000e+01</td>\n",
       "      <td>6.400000e+01</td>\n",
       "      <td>9.580840e+05</td>\n",
       "      <td>7.111400e+05</td>\n",
       "      <td>3.396000e+02</td>\n",
       "      <td>1.000000e+01</td>\n",
       "      <td>2.499250e+05</td>\n",
       "      <td>9.990000e+02</td>\n",
       "      <td>9.990000e+02</td>\n",
       "      <td>5.470000e+02</td>\n",
       "      <td>3.820000e+02</td>\n",
       "      <td>9.400000e+01</td>\n",
       "      <td>6.610000e+02</td>\n",
       "      <td>202.000000</td>\n",
       "      <td>2.500000e+01</td>\n",
       "      <td>202.000000</td>\n",
       "      <td>5.800000e+01</td>\n",
       "      <td>5.000000e+01</td>\n",
       "      <td>7.200000e+01</td>\n",
       "      <td>7.100000e+01</td>\n",
       "      <td>8.600000e+01</td>\n",
       "      <td>1.590000e+02</td>\n",
       "      <td>9.100000e+01</td>\n",
       "      <td>1.510000e+02</td>\n",
       "      <td>6.500000e+01</td>\n",
       "      <td>1.010000e+02</td>\n",
       "      <td>7.000000e+00</td>\n",
       "      <td>4.000000e+00</td>\n",
       "      <td>5.800000e+01</td>\n",
       "      <td>3.200000e+01</td>\n",
       "      <td>1.000000e+02</td>\n",
       "      <td>1.000000e+02</td>\n",
       "      <td>1.200000e+01</td>\n",
       "      <td>8.500000e+01</td>\n",
       "      <td>9.999999e+06</td>\n",
       "      <td>3.408095e+06</td>\n",
       "      <td>1.569000e+06</td>\n",
       "      <td>2.118996e+06</td>\n",
       "      <td>1.110019e+06</td>\n",
       "      <td>845.000000</td>\n",
       "      <td>850.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>27.000000</td>\n",
       "      <td>82.000000</td>\n",
       "      <td>434.300000</td>\n",
       "      <td>43.000000</td>\n",
       "      <td>106.000000</td>\n",
       "      <td>21.000000</td>\n",
       "      <td>23.000000</td>\n",
       "      <td>185.000000</td>\n",
       "      <td>3.0</td>\n",
       "      <td>943.940000</td>\n",
       "      <td>3.0</td>\n",
       "      <td>37.000000</td>\n",
       "      <td>2680.890000</td>\n",
       "      <td>40306.410000</td>\n",
       "      <td>1407.860000</td>\n",
       "      <td>33601.000000</td>\n",
       "      <td>521.350000</td>\n",
       "      <td>181.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       member_id     loan_amnt   funded_amnt  funded_amnt_inv      int_rate  \\\n",
       "count        0.0  2.260668e+06  2.260668e+06     2.260668e+06  2.260668e+06   \n",
       "mean         NaN  1.504693e+04  1.504166e+04     1.502344e+04  1.309283e+01   \n",
       "std          NaN  9.190245e+03  9.188413e+03     9.192332e+03  4.832138e+00   \n",
       "min          NaN  5.000000e+02  5.000000e+02     0.000000e+00  5.310000e+00   \n",
       "25%          NaN  8.000000e+03  8.000000e+03     8.000000e+03  9.490000e+00   \n",
       "50%          NaN  1.290000e+04  1.287500e+04     1.280000e+04  1.262000e+01   \n",
       "75%          NaN  2.000000e+04  2.000000e+04     2.000000e+04  1.599000e+01   \n",
       "max          NaN  4.000000e+04  4.000000e+04     4.000000e+04  3.099000e+01   \n",
       "\n",
       "        installment    annual_inc           dti   delinq_2yrs  fico_range_low  \\\n",
       "count  2.260668e+06  2.260664e+06  2.258957e+06  2.260639e+06    2.260668e+06   \n",
       "mean   4.458068e+02  7.799243e+04  1.882420e+01  3.068792e-01    6.985882e+02   \n",
       "std    2.671735e+02  1.126962e+05  1.418333e+01  8.672303e-01    3.301038e+01   \n",
       "min    4.930000e+00  0.000000e+00 -1.000000e+00  0.000000e+00    6.100000e+02   \n",
       "25%    2.516500e+02  4.600000e+04  1.189000e+01  0.000000e+00    6.750000e+02   \n",
       "50%    3.779900e+02  6.500000e+04  1.784000e+01  0.000000e+00    6.900000e+02   \n",
       "75%    5.933200e+02  9.300000e+04  2.449000e+01  0.000000e+00    7.150000e+02   \n",
       "max    1.719830e+03  1.100000e+08  9.990000e+02  5.800000e+01    8.450000e+02   \n",
       "\n",
       "       fico_range_high  inq_last_6mths  mths_since_last_delinq  \\\n",
       "count     2.260668e+06    2.260638e+06            1.102166e+06   \n",
       "mean      7.025884e+02    5.768354e-01            3.454092e+01   \n",
       "std       3.301124e+01    8.859632e-01            2.190047e+01   \n",
       "min       6.140000e+02    0.000000e+00            0.000000e+00   \n",
       "25%       6.790000e+02    0.000000e+00            1.600000e+01   \n",
       "50%       6.940000e+02    0.000000e+00            3.100000e+01   \n",
       "75%       7.190000e+02    1.000000e+00            5.000000e+01   \n",
       "max       8.500000e+02    3.300000e+01            2.260000e+02   \n",
       "\n",
       "       mths_since_last_record      open_acc       pub_rec     revol_bal  \\\n",
       "count           359156.000000  2.260639e+06  2.260639e+06  2.260668e+06   \n",
       "mean                72.312842  1.161240e+01  1.975278e-01  1.665846e+04   \n",
       "std                 26.464094  5.640861e+00  5.705150e-01  2.294831e+04   \n",
       "min                  0.000000  0.000000e+00  0.000000e+00  0.000000e+00   \n",
       "25%                 55.000000  8.000000e+00  0.000000e+00  5.950000e+03   \n",
       "50%                 74.000000  1.100000e+01  0.000000e+00  1.132400e+04   \n",
       "75%                 92.000000  1.400000e+01  0.000000e+00  2.024600e+04   \n",
       "max                129.000000  1.010000e+02  8.600000e+01  2.904836e+06   \n",
       "\n",
       "         revol_util     total_acc     out_prncp  out_prncp_inv   total_pymnt  \\\n",
       "count  2.258866e+06  2.260639e+06  2.260668e+06   2.260668e+06  2.260668e+06   \n",
       "mean   5.033770e+01  2.416255e+01  4.206891e+03   4.205965e+03  1.208256e+04   \n",
       "std    2.471307e+01  1.198753e+01  7.343239e+03   7.342333e+03  9.901383e+03   \n",
       "min    0.000000e+00  1.000000e+00  0.000000e+00   0.000000e+00  0.000000e+00   \n",
       "25%    3.150000e+01  1.500000e+01  0.000000e+00   0.000000e+00  4.546458e+03   \n",
       "50%    5.030000e+01  2.200000e+01  0.000000e+00   0.000000e+00  9.329720e+03   \n",
       "75%    6.940000e+01  3.100000e+01  6.149940e+03   6.146310e+03  1.694087e+04   \n",
       "max    8.923000e+02  1.760000e+02  4.000000e+04   4.000000e+04  6.329688e+04   \n",
       "\n",
       "       total_pymnt_inv  total_rec_prncp  total_rec_int  total_rec_late_fee  \\\n",
       "count     2.260668e+06     2.260668e+06   2.260668e+06        2.260668e+06   \n",
       "mean      1.206439e+04     9.505772e+03   2.431388e+03        1.518453e+00   \n",
       "std       9.896992e+03     8.321852e+03   2.679738e+03        1.184159e+01   \n",
       "min       0.000000e+00     0.000000e+00   0.000000e+00       -9.500000e-09   \n",
       "25%       4.531800e+03     3.000000e+03   7.281875e+02        0.000000e+00   \n",
       "50%       9.309675e+03     7.000000e+03   1.525940e+03        0.000000e+00   \n",
       "75%       1.691671e+04     1.389910e+04   3.108062e+03        0.000000e+00   \n",
       "max       6.329688e+04     4.000000e+04   2.819250e+04        1.484340e+03   \n",
       "\n",
       "         recoveries  collection_recovery_fee  last_pymnt_amnt  \\\n",
       "count  2.260668e+06             2.260668e+06     2.260668e+06   \n",
       "mean   1.438791e+02             2.398257e+01     3.429346e+03   \n",
       "std    7.481640e+02             1.312256e+02     6.018248e+03   \n",
       "min    0.000000e+00             0.000000e+00     0.000000e+00   \n",
       "25%    0.000000e+00             0.000000e+00     3.103300e+02   \n",
       "50%    0.000000e+00             0.000000e+00     6.005700e+02   \n",
       "75%    0.000000e+00             0.000000e+00     3.743750e+03   \n",
       "max    3.985955e+04             7.174719e+03     4.219205e+04   \n",
       "\n",
       "       last_fico_range_high  last_fico_range_low  collections_12_mths_ex_med  \\\n",
       "count          2.260668e+06         2.260668e+06                2.260523e+06   \n",
       "mean           6.876610e+02         6.755397e+02                1.814580e-02   \n",
       "std            7.297044e+01         1.110976e+02                1.508131e-01   \n",
       "min            0.000000e+00         0.000000e+00                0.000000e+00   \n",
       "25%            6.540000e+02         6.500000e+02                0.000000e+00   \n",
       "50%            6.990000e+02         6.950000e+02                0.000000e+00   \n",
       "75%            7.340000e+02         7.300000e+02                0.000000e+00   \n",
       "max            8.500000e+02         8.450000e+02                2.000000e+01   \n",
       "\n",
       "       mths_since_last_major_derog  policy_code  annual_inc_joint  \\\n",
       "count                580775.000000    2260668.0      1.207100e+05   \n",
       "mean                     44.164220          1.0      1.236246e+05   \n",
       "std                      21.533121          0.0      7.416135e+04   \n",
       "min                       0.000000          1.0      5.693510e+03   \n",
       "25%                      27.000000          1.0      8.340000e+04   \n",
       "50%                      44.000000          1.0      1.100000e+05   \n",
       "75%                      62.000000          1.0      1.479950e+05   \n",
       "max                     226.000000          1.0      7.874821e+06   \n",
       "\n",
       "           dti_joint  acc_now_delinq  tot_coll_amt   tot_cur_bal  \\\n",
       "count  120706.000000    2.260639e+06  2.190392e+06  2.190392e+06   \n",
       "mean       19.251817    4.147942e-03  2.327317e+02  1.424922e+05   \n",
       "std         7.822086    6.961656e-02  8.518462e+03  1.606926e+05   \n",
       "min         0.000000    0.000000e+00  0.000000e+00  0.000000e+00   \n",
       "25%        13.530000    0.000000e+00  0.000000e+00  2.909200e+04   \n",
       "50%        18.840000    0.000000e+00  0.000000e+00  7.924000e+04   \n",
       "75%        24.620000    0.000000e+00  0.000000e+00  2.132040e+05   \n",
       "max        69.490000    1.400000e+01  9.152545e+06  9.971659e+06   \n",
       "\n",
       "        open_acc_6m   open_act_il   open_il_12m   open_il_24m  \\\n",
       "count  1.394538e+06  1.394539e+06  1.394539e+06  1.394539e+06   \n",
       "mean   9.344199e-01  2.779407e+00  6.764314e-01  1.562752e+00   \n",
       "std    1.140700e+00  3.000784e+00  9.256354e-01  1.578672e+00   \n",
       "min    0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00   \n",
       "25%    0.000000e+00  1.000000e+00  0.000000e+00  0.000000e+00   \n",
       "50%    1.000000e+00  2.000000e+00  0.000000e+00  1.000000e+00   \n",
       "75%    1.000000e+00  3.000000e+00  1.000000e+00  2.000000e+00   \n",
       "max    1.800000e+01  5.700000e+01  2.500000e+01  5.100000e+01   \n",
       "\n",
       "       mths_since_rcnt_il  total_bal_il       il_util   open_rv_12m  \\\n",
       "count        1.350744e+06  1.394539e+06  1.191818e+06  1.394539e+06   \n",
       "mean         2.122236e+01  3.550665e+04  6.914098e+01  1.290133e+00   \n",
       "std          2.604919e+01  4.409746e+04  2.374839e+01  1.506827e+00   \n",
       "min          0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00   \n",
       "25%          7.000000e+00  8.695000e+03  5.500000e+01  0.000000e+00   \n",
       "50%          1.300000e+01  2.312700e+04  7.200000e+01  1.000000e+00   \n",
       "75%          2.400000e+01  4.609500e+04  8.600000e+01  2.000000e+00   \n",
       "max          5.110000e+02  1.837038e+06  1.000000e+03  2.800000e+01   \n",
       "\n",
       "        open_rv_24m    max_bal_bc      all_util  total_rev_hi_lim  \\\n",
       "count  1.394539e+06  1.394539e+06  1.394320e+06      2.190392e+06   \n",
       "mean   2.749923e+00  5.806393e+03  5.703230e+01      3.457394e+04   \n",
       "std    2.596911e+00  5.690561e+03  2.090475e+01      3.672850e+04   \n",
       "min    0.000000e+00  0.000000e+00  0.000000e+00      0.000000e+00   \n",
       "25%    1.000000e+00  2.284000e+03  4.300000e+01      1.470000e+04   \n",
       "50%    2.000000e+00  4.413000e+03  5.800000e+01      2.540000e+04   \n",
       "75%    4.000000e+00  7.598000e+03  7.200000e+01      4.320000e+04   \n",
       "max    6.000000e+01  1.170668e+06  2.390000e+02      9.999999e+06   \n",
       "\n",
       "             inq_fi   total_cu_tl  inq_last_12m  acc_open_past_24mths  \\\n",
       "count  1.394539e+06  1.394538e+06  1.394538e+06          2.210638e+06   \n",
       "mean   1.012867e+00  1.477304e+00  2.036667e+00          4.521656e+00   \n",
       "std    1.489456e+00  2.672991e+00  2.383117e+00          3.164229e+00   \n",
       "min    0.000000e+00  0.000000e+00  0.000000e+00          0.000000e+00   \n",
       "25%    0.000000e+00  0.000000e+00  0.000000e+00          2.000000e+00   \n",
       "50%    1.000000e+00  0.000000e+00  1.000000e+00          4.000000e+00   \n",
       "75%    1.000000e+00  2.000000e+00  3.000000e+00          6.000000e+00   \n",
       "max    4.800000e+01  1.110000e+02  6.700000e+01          6.400000e+01   \n",
       "\n",
       "        avg_cur_bal  bc_open_to_buy       bc_util  chargeoff_within_12_mths  \\\n",
       "count  2.190322e+06    2.185733e+06  2.184597e+06              2.260523e+06   \n",
       "mean   1.354780e+04    1.139426e+04  5.789995e+01              8.464413e-03   \n",
       "std    1.647408e+04    1.659953e+04  2.858347e+01              1.048098e-01   \n",
       "min    0.000000e+00    0.000000e+00  0.000000e+00              0.000000e+00   \n",
       "25%    3.080000e+03    1.722000e+03  3.540000e+01              0.000000e+00   \n",
       "50%    7.335000e+03    5.442000e+03  6.020000e+01              0.000000e+00   \n",
       "75%    1.878300e+04    1.418700e+04  8.310000e+01              0.000000e+00   \n",
       "max    9.580840e+05    7.111400e+05  3.396000e+02              1.000000e+01   \n",
       "\n",
       "        delinq_amnt  mo_sin_old_il_acct  mo_sin_old_rev_tl_op  \\\n",
       "count  2.260639e+06        2.121597e+06          2.190391e+06   \n",
       "mean   1.236983e+01        1.257378e+02          1.814916e+02   \n",
       "std    7.264648e+02        5.338218e+01          9.711845e+01   \n",
       "min    0.000000e+00        0.000000e+00          1.000000e+00   \n",
       "25%    0.000000e+00        9.600000e+01          1.160000e+02   \n",
       "50%    0.000000e+00        1.300000e+02          1.640000e+02   \n",
       "75%    0.000000e+00        1.540000e+02          2.320000e+02   \n",
       "max    2.499250e+05        9.990000e+02          9.990000e+02   \n",
       "\n",
       "       mo_sin_rcnt_rev_tl_op  mo_sin_rcnt_tl      mort_acc  \\\n",
       "count           2.190391e+06    2.190392e+06  2.210638e+06   \n",
       "mean            1.402409e+01    8.297469e+00  1.555382e+00   \n",
       "std             1.753308e+01    9.208557e+00  1.904981e+00   \n",
       "min             0.000000e+00    0.000000e+00  0.000000e+00   \n",
       "25%             4.000000e+00    3.000000e+00  0.000000e+00   \n",
       "50%             8.000000e+00    6.000000e+00  1.000000e+00   \n",
       "75%             1.700000e+01    1.100000e+01  3.000000e+00   \n",
       "max             5.470000e+02    3.820000e+02  9.400000e+01   \n",
       "\n",
       "       mths_since_recent_bc  mths_since_recent_bc_dlq  mths_since_recent_inq  \\\n",
       "count          2.187256e+06             519701.000000           1.965233e+06   \n",
       "mean           2.484485e+01                 39.303090           7.024194e+00   \n",
       "std            3.231925e+01                 22.617689           5.965411e+00   \n",
       "min            0.000000e+00                  0.000000           0.000000e+00   \n",
       "25%            6.000000e+00                 21.000000           2.000000e+00   \n",
       "50%            1.400000e+01                 37.000000           5.000000e+00   \n",
       "75%            3.000000e+01                 57.000000           1.100000e+01   \n",
       "max            6.610000e+02                202.000000           2.500000e+01   \n",
       "\n",
       "       mths_since_recent_revol_delinq  num_accts_ever_120_pd  num_actv_bc_tl  \\\n",
       "count                   740359.000000           2.190392e+06    2.190392e+06   \n",
       "mean                        35.782223           5.002082e-01    3.676069e+00   \n",
       "std                         22.307239           1.350326e+00    2.324646e+00   \n",
       "min                          0.000000           0.000000e+00    0.000000e+00   \n",
       "25%                         17.000000           0.000000e+00    2.000000e+00   \n",
       "50%                         33.000000           0.000000e+00    3.000000e+00   \n",
       "75%                         51.000000           0.000000e+00    5.000000e+00   \n",
       "max                        202.000000           5.800000e+01    5.000000e+01   \n",
       "\n",
       "       num_actv_rev_tl   num_bc_sats     num_bc_tl     num_il_tl  \\\n",
       "count     2.190392e+06  2.202078e+06  2.190392e+06  2.190392e+06   \n",
       "mean      5.629468e+00  4.774183e+00  7.726402e+00  8.413439e+00   \n",
       "std       3.382874e+00  3.037921e+00  4.701430e+00  7.359114e+00   \n",
       "min       0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00   \n",
       "25%       3.000000e+00  3.000000e+00  4.000000e+00  3.000000e+00   \n",
       "50%       5.000000e+00  4.000000e+00  7.000000e+00  6.000000e+00   \n",
       "75%       7.000000e+00  6.000000e+00  1.000000e+01  1.100000e+01   \n",
       "max       7.200000e+01  7.100000e+01  8.600000e+01  1.590000e+02   \n",
       "\n",
       "       num_op_rev_tl  num_rev_accts  num_rev_tl_bal_gt_0      num_sats  \\\n",
       "count   2.190392e+06   2.190391e+06         2.190392e+06  2.202078e+06   \n",
       "mean    8.246523e+00   1.400463e+01         5.577951e+00  1.162813e+01   \n",
       "std     4.683928e+00   8.038868e+00         3.293434e+00  5.644027e+00   \n",
       "min     0.000000e+00   0.000000e+00         0.000000e+00  0.000000e+00   \n",
       "25%     5.000000e+00   8.000000e+00         3.000000e+00  8.000000e+00   \n",
       "50%     7.000000e+00   1.200000e+01         5.000000e+00  1.100000e+01   \n",
       "75%     1.000000e+01   1.800000e+01         7.000000e+00  1.400000e+01   \n",
       "max     9.100000e+01   1.510000e+02         6.500000e+01  1.010000e+02   \n",
       "\n",
       "       num_tl_120dpd_2m  num_tl_30dpd  num_tl_90g_dpd_24m  num_tl_op_past_12m  \\\n",
       "count      2.107011e+06  2.190392e+06        2.190392e+06        2.190392e+06   \n",
       "mean       6.373958e-04  2.813652e-03        8.293767e-02        2.076755e+00   \n",
       "std        2.710643e-02  5.616522e-02        4.935732e-01        1.830711e+00   \n",
       "min        0.000000e+00  0.000000e+00        0.000000e+00        0.000000e+00   \n",
       "25%        0.000000e+00  0.000000e+00        0.000000e+00        1.000000e+00   \n",
       "50%        0.000000e+00  0.000000e+00        0.000000e+00        2.000000e+00   \n",
       "75%        0.000000e+00  0.000000e+00        0.000000e+00        3.000000e+00   \n",
       "max        7.000000e+00  4.000000e+00        5.800000e+01        3.200000e+01   \n",
       "\n",
       "       pct_tl_nvr_dlq  percent_bc_gt_75  pub_rec_bankruptcies     tax_liens  \\\n",
       "count    2.190237e+06      2.185289e+06          2.259303e+06  2.260563e+06   \n",
       "mean     9.411458e+01      4.243513e+01          1.281935e-01  4.677109e-02   \n",
       "std      9.036140e+00      3.621616e+01          3.646130e-01  3.775338e-01   \n",
       "min      0.000000e+00      0.000000e+00          0.000000e+00  0.000000e+00   \n",
       "25%      9.130000e+01      0.000000e+00          0.000000e+00  0.000000e+00   \n",
       "50%      1.000000e+02      3.750000e+01          0.000000e+00  0.000000e+00   \n",
       "75%      1.000000e+02      7.140000e+01          0.000000e+00  0.000000e+00   \n",
       "max      1.000000e+02      1.000000e+02          1.200000e+01  8.500000e+01   \n",
       "\n",
       "       tot_hi_cred_lim  total_bal_ex_mort  total_bc_limit  \\\n",
       "count     2.190392e+06       2.210638e+06    2.210638e+06   \n",
       "mean      1.782428e+05       5.102294e+04    2.319377e+04   \n",
       "std       1.815748e+05       4.991124e+04    2.300656e+04   \n",
       "min       0.000000e+00       0.000000e+00    0.000000e+00   \n",
       "25%       5.073100e+04       2.089200e+04    8.300000e+03   \n",
       "50%       1.142985e+05       3.786400e+04    1.630000e+04   \n",
       "75%       2.577550e+05       6.435000e+04    3.030000e+04   \n",
       "max       9.999999e+06       3.408095e+06    1.569000e+06   \n",
       "\n",
       "       total_il_high_credit_limit  revol_bal_joint  sec_app_fico_range_low  \\\n",
       "count                2.190392e+06     1.080200e+05           108021.000000   \n",
       "mean                 4.373201e+04     3.361728e+04              669.755603   \n",
       "std                  4.507298e+04     2.815387e+04               44.729163   \n",
       "min                  0.000000e+00     0.000000e+00              540.000000   \n",
       "25%                  1.500000e+04     1.510675e+04              645.000000   \n",
       "50%                  3.269600e+04     2.651650e+04              670.000000   \n",
       "75%                  5.880425e+04     4.376900e+04              695.000000   \n",
       "max                  2.118996e+06     1.110019e+06              845.000000   \n",
       "\n",
       "       sec_app_fico_range_high  sec_app_inq_last_6mths  sec_app_mort_acc  \\\n",
       "count            108021.000000           108021.000000     108021.000000   \n",
       "mean                673.755631                0.633256          1.538997   \n",
       "std                  44.729272                0.993401          1.760569   \n",
       "min                 544.000000                0.000000          0.000000   \n",
       "25%                 649.000000                0.000000          0.000000   \n",
       "50%                 674.000000                0.000000          1.000000   \n",
       "75%                 699.000000                1.000000          2.000000   \n",
       "max                 850.000000                6.000000         27.000000   \n",
       "\n",
       "       sec_app_open_acc  sec_app_revol_util  sec_app_open_act_il  \\\n",
       "count     108021.000000       106184.000000        108021.000000   \n",
       "mean          11.469455           58.169101             3.010554   \n",
       "std            6.627271           25.548212             3.275893   \n",
       "min            0.000000            0.000000             0.000000   \n",
       "25%            7.000000           39.800000             1.000000   \n",
       "50%           10.000000           60.200000             2.000000   \n",
       "75%           15.000000           78.600000             4.000000   \n",
       "max           82.000000          434.300000            43.000000   \n",
       "\n",
       "       sec_app_num_rev_accts  sec_app_chargeoff_within_12_mths  \\\n",
       "count          108021.000000                     108021.000000   \n",
       "mean               12.533072                          0.046352   \n",
       "std                 8.150964                          0.411496   \n",
       "min                 0.000000                          0.000000   \n",
       "25%                 7.000000                          0.000000   \n",
       "50%                11.000000                          0.000000   \n",
       "75%                17.000000                          0.000000   \n",
       "max               106.000000                         21.000000   \n",
       "\n",
       "       sec_app_collections_12_mths_ex_med  \\\n",
       "count                       108021.000000   \n",
       "mean                             0.077568   \n",
       "std                              0.407996   \n",
       "min                              0.000000   \n",
       "25%                              0.000000   \n",
       "50%                              0.000000   \n",
       "75%                              0.000000   \n",
       "max                             23.000000   \n",
       "\n",
       "       sec_app_mths_since_last_major_derog  deferral_term  hardship_amount  \\\n",
       "count                         35942.000000        10917.0     10917.000000   \n",
       "mean                             36.937928            3.0       155.045981   \n",
       "std                              23.924584            0.0       129.040594   \n",
       "min                               0.000000            3.0         0.640000   \n",
       "25%                              16.000000            3.0        59.440000   \n",
       "50%                              36.000000            3.0       119.140000   \n",
       "75%                              56.000000            3.0       213.260000   \n",
       "max                             185.000000            3.0       943.940000   \n",
       "\n",
       "       hardship_length  hardship_dpd  \\\n",
       "count          10917.0  10917.000000   \n",
       "mean               3.0     13.743886   \n",
       "std                0.0      9.671178   \n",
       "min                3.0      0.000000   \n",
       "25%                3.0      5.000000   \n",
       "50%                3.0     15.000000   \n",
       "75%                3.0     22.000000   \n",
       "max                3.0     37.000000   \n",
       "\n",
       "       orig_projected_additional_accrued_interest  \\\n",
       "count                                 8651.000000   \n",
       "mean                                   454.798089   \n",
       "std                                    375.385500   \n",
       "min                                      1.920000   \n",
       "25%                                    175.230000   \n",
       "50%                                    352.770000   \n",
       "75%                                    620.175000   \n",
       "max                                   2680.890000   \n",
       "\n",
       "       hardship_payoff_balance_amount  hardship_last_payment_amount  \\\n",
       "count                    10917.000000                  10917.000000   \n",
       "mean                     11636.883942                    193.994321   \n",
       "std                       7625.988281                    198.629496   \n",
       "min                         55.730000                      0.010000   \n",
       "25%                       5627.000000                     44.440000   \n",
       "50%                      10028.390000                    133.160000   \n",
       "75%                      16151.890000                    284.190000   \n",
       "max                      40306.410000                   1407.860000   \n",
       "\n",
       "       settlement_amount  settlement_percentage  settlement_term  \n",
       "count       34246.000000           34246.000000     34246.000000  \n",
       "mean         5010.664267              47.780365        13.191322  \n",
       "std          3693.122590               7.311822         8.159980  \n",
       "min            44.210000               0.200000         0.000000  \n",
       "25%          2208.000000              45.000000         6.000000  \n",
       "50%          4146.110000              45.000000        14.000000  \n",
       "75%          6850.172500              50.000000        18.000000  \n",
       "max         33601.000000             521.350000       181.000000  "
      ]
     },
     "execution_count": 122,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "id": "5b491558",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "id                                                  0\n",
       "member_id                                     2260701\n",
       "loan_amnt                                          33\n",
       "funded_amnt                                        33\n",
       "funded_amnt_inv                                    33\n",
       "term                                               33\n",
       "int_rate                                           33\n",
       "installment                                        33\n",
       "grade                                              33\n",
       "sub_grade                                          33\n",
       "emp_title                                      167002\n",
       "emp_length                                     146940\n",
       "home_ownership                                     33\n",
       "annual_inc                                         37\n",
       "verification_status                                33\n",
       "issue_d                                            33\n",
       "loan_status                                        33\n",
       "pymnt_plan                                         33\n",
       "url                                                33\n",
       "desc                                          2134634\n",
       "purpose                                            33\n",
       "title                                           23358\n",
       "zip_code                                           34\n",
       "addr_state                                         33\n",
       "dti                                              1744\n",
       "delinq_2yrs                                        62\n",
       "earliest_cr_line                                   62\n",
       "fico_range_low                                     33\n",
       "fico_range_high                                    33\n",
       "inq_last_6mths                                     63\n",
       "mths_since_last_delinq                        1158535\n",
       "mths_since_last_record                        1901545\n",
       "open_acc                                           62\n",
       "pub_rec                                            62\n",
       "revol_bal                                          33\n",
       "revol_util                                       1835\n",
       "total_acc                                          62\n",
       "initial_list_status                                33\n",
       "out_prncp                                          33\n",
       "out_prncp_inv                                      33\n",
       "total_pymnt                                        33\n",
       "total_pymnt_inv                                    33\n",
       "total_rec_prncp                                    33\n",
       "total_rec_int                                      33\n",
       "total_rec_late_fee                                 33\n",
       "recoveries                                         33\n",
       "collection_recovery_fee                            33\n",
       "last_pymnt_d                                     2460\n",
       "last_pymnt_amnt                                    33\n",
       "next_pymnt_d                                  1345343\n",
       "last_credit_pull_d                                105\n",
       "last_fico_range_high                               33\n",
       "last_fico_range_low                                33\n",
       "collections_12_mths_ex_med                        178\n",
       "mths_since_last_major_derog                   1679926\n",
       "policy_code                                        33\n",
       "application_type                                   33\n",
       "annual_inc_joint                              2139991\n",
       "dti_joint                                     2139995\n",
       "verification_status_joint                     2144971\n",
       "acc_now_delinq                                     62\n",
       "tot_coll_amt                                    70309\n",
       "tot_cur_bal                                     70309\n",
       "open_acc_6m                                    866163\n",
       "open_act_il                                    866162\n",
       "open_il_12m                                    866162\n",
       "open_il_24m                                    866162\n",
       "mths_since_rcnt_il                             909957\n",
       "total_bal_il                                   866162\n",
       "il_util                                       1068883\n",
       "open_rv_12m                                    866162\n",
       "open_rv_24m                                    866162\n",
       "max_bal_bc                                     866162\n",
       "all_util                                       866381\n",
       "total_rev_hi_lim                                70309\n",
       "inq_fi                                         866162\n",
       "total_cu_tl                                    866163\n",
       "inq_last_12m                                   866163\n",
       "acc_open_past_24mths                            50063\n",
       "avg_cur_bal                                     70379\n",
       "bc_open_to_buy                                  74968\n",
       "bc_util                                         76104\n",
       "chargeoff_within_12_mths                          178\n",
       "delinq_amnt                                        62\n",
       "mo_sin_old_il_acct                             139104\n",
       "mo_sin_old_rev_tl_op                            70310\n",
       "mo_sin_rcnt_rev_tl_op                           70310\n",
       "mo_sin_rcnt_tl                                  70309\n",
       "mort_acc                                        50063\n",
       "mths_since_recent_bc                            73445\n",
       "mths_since_recent_bc_dlq                      1741000\n",
       "mths_since_recent_inq                          295468\n",
       "mths_since_recent_revol_delinq                1520342\n",
       "num_accts_ever_120_pd                           70309\n",
       "num_actv_bc_tl                                  70309\n",
       "num_actv_rev_tl                                 70309\n",
       "num_bc_sats                                     58623\n",
       "num_bc_tl                                       70309\n",
       "num_il_tl                                       70309\n",
       "num_op_rev_tl                                   70309\n",
       "num_rev_accts                                   70310\n",
       "num_rev_tl_bal_gt_0                             70309\n",
       "num_sats                                        58623\n",
       "num_tl_120dpd_2m                               153690\n",
       "num_tl_30dpd                                    70309\n",
       "num_tl_90g_dpd_24m                              70309\n",
       "num_tl_op_past_12m                              70309\n",
       "pct_tl_nvr_dlq                                  70464\n",
       "percent_bc_gt_75                                75412\n",
       "pub_rec_bankruptcies                             1398\n",
       "tax_liens                                         138\n",
       "tot_hi_cred_lim                                 70309\n",
       "total_bal_ex_mort                               50063\n",
       "total_bc_limit                                  50063\n",
       "total_il_high_credit_limit                      70309\n",
       "revol_bal_joint                               2152681\n",
       "sec_app_fico_range_low                        2152680\n",
       "sec_app_fico_range_high                       2152680\n",
       "sec_app_earliest_cr_line                      2152680\n",
       "sec_app_inq_last_6mths                        2152680\n",
       "sec_app_mort_acc                              2152680\n",
       "sec_app_open_acc                              2152680\n",
       "sec_app_revol_util                            2154517\n",
       "sec_app_open_act_il                           2152680\n",
       "sec_app_num_rev_accts                         2152680\n",
       "sec_app_chargeoff_within_12_mths              2152680\n",
       "sec_app_collections_12_mths_ex_med            2152680\n",
       "sec_app_mths_since_last_major_derog           2224759\n",
       "hardship_flag                                      33\n",
       "hardship_type                                 2249784\n",
       "hardship_reason                               2249784\n",
       "hardship_status                               2249784\n",
       "deferral_term                                 2249784\n",
       "hardship_amount                               2249784\n",
       "hardship_start_date                           2249784\n",
       "hardship_end_date                             2249784\n",
       "payment_plan_start_date                       2249784\n",
       "hardship_length                               2249784\n",
       "hardship_dpd                                  2249784\n",
       "hardship_loan_status                          2249784\n",
       "orig_projected_additional_accrued_interest    2252050\n",
       "hardship_payoff_balance_amount                2249784\n",
       "hardship_last_payment_amount                  2249784\n",
       "disbursement_method                                33\n",
       "debt_settlement_flag                               33\n",
       "debt_settlement_flag_date                     2226455\n",
       "settlement_status                             2226455\n",
       "settlement_date                               2226455\n",
       "settlement_amount                             2226455\n",
       "settlement_percentage                         2226455\n",
       "settlement_term                               2226455\n",
       "dtype: int64"
      ]
     },
     "execution_count": 123,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "id": "b040b3d7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2260701, 151)"
      ]
     },
     "execution_count": 124,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bf399861",
   "metadata": {},
   "source": [
    "## 特征选择"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eaa35216",
   "metadata": {},
   "source": [
    "### 删除无用的列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "id": "7c397811",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['id',\n",
       " 'term',\n",
       " 'grade',\n",
       " 'sub_grade',\n",
       " 'emp_title',\n",
       " 'emp_length',\n",
       " 'home_ownership',\n",
       " 'verification_status',\n",
       " 'issue_d',\n",
       " 'loan_status',\n",
       " 'pymnt_plan',\n",
       " 'url',\n",
       " 'desc',\n",
       " 'purpose',\n",
       " 'title',\n",
       " 'zip_code',\n",
       " 'addr_state',\n",
       " 'earliest_cr_line',\n",
       " 'initial_list_status',\n",
       " 'last_pymnt_d',\n",
       " 'next_pymnt_d',\n",
       " 'last_credit_pull_d',\n",
       " 'application_type',\n",
       " 'verification_status_joint',\n",
       " 'sec_app_earliest_cr_line',\n",
       " 'hardship_flag',\n",
       " 'hardship_type',\n",
       " 'hardship_reason',\n",
       " 'hardship_status',\n",
       " 'hardship_start_date',\n",
       " 'hardship_end_date',\n",
       " 'payment_plan_start_date',\n",
       " 'hardship_loan_status',\n",
       " 'disbursement_method',\n",
       " 'debt_settlement_flag',\n",
       " 'debt_settlement_flag_date',\n",
       " 'settlement_status',\n",
       " 'settlement_date']"
      ]
     },
     "execution_count": 125,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[col for col in data.columns.values if data[col].dtype==object]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "id": "82fcd177",
   "metadata": {},
   "outputs": [],
   "source": [
    "cols_to_drop = ['id',\n",
    " 'term',\n",
    " 'sub_grade',\n",
    " 'emp_title',\n",
    " 'issue_d',\n",
    " 'pymnt_plan',\n",
    " 'url',\n",
    " 'desc',\n",
    " 'purpose',\n",
    " 'title',\n",
    " 'zip_code',\n",
    " 'earliest_cr_line',\n",
    " 'initial_list_status',\n",
    " 'last_pymnt_d',\n",
    " 'next_pymnt_d',\n",
    " 'last_credit_pull_d',\n",
    " 'application_type',\n",
    " 'verification_status_joint',\n",
    " 'sec_app_earliest_cr_line',\n",
    " 'hardship_flag',\n",
    " 'hardship_type',\n",
    " 'hardship_reason',\n",
    " 'hardship_status',\n",
    " 'hardship_start_date',\n",
    " 'hardship_end_date',\n",
    " 'payment_plan_start_date',\n",
    " 'hardship_loan_status',\n",
    " 'disbursement_method',\n",
    " 'debt_settlement_flag',\n",
    " 'debt_settlement_flag_date',\n",
    " 'settlement_status',\n",
    " 'settlement_date']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "id": "9d7e7f2b",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.drop(columns=cols_to_drop, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "id": "f6271579",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>member_id</th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>funded_amnt</th>\n",
       "      <th>funded_amnt_inv</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>installment</th>\n",
       "      <th>grade</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>home_ownership</th>\n",
       "      <th>annual_inc</th>\n",
       "      <th>verification_status</th>\n",
       "      <th>loan_status</th>\n",
       "      <th>addr_state</th>\n",
       "      <th>dti</th>\n",
       "      <th>delinq_2yrs</th>\n",
       "      <th>fico_range_low</th>\n",
       "      <th>fico_range_high</th>\n",
       "      <th>inq_last_6mths</th>\n",
       "      <th>mths_since_last_delinq</th>\n",
       "      <th>mths_since_last_record</th>\n",
       "      <th>open_acc</th>\n",
       "      <th>pub_rec</th>\n",
       "      <th>revol_bal</th>\n",
       "      <th>revol_util</th>\n",
       "      <th>total_acc</th>\n",
       "      <th>out_prncp</th>\n",
       "      <th>out_prncp_inv</th>\n",
       "      <th>total_pymnt</th>\n",
       "      <th>total_pymnt_inv</th>\n",
       "      <th>total_rec_prncp</th>\n",
       "      <th>total_rec_int</th>\n",
       "      <th>total_rec_late_fee</th>\n",
       "      <th>recoveries</th>\n",
       "      <th>collection_recovery_fee</th>\n",
       "      <th>last_pymnt_amnt</th>\n",
       "      <th>last_fico_range_high</th>\n",
       "      <th>last_fico_range_low</th>\n",
       "      <th>collections_12_mths_ex_med</th>\n",
       "      <th>mths_since_last_major_derog</th>\n",
       "      <th>policy_code</th>\n",
       "      <th>annual_inc_joint</th>\n",
       "      <th>dti_joint</th>\n",
       "      <th>acc_now_delinq</th>\n",
       "      <th>tot_coll_amt</th>\n",
       "      <th>tot_cur_bal</th>\n",
       "      <th>open_acc_6m</th>\n",
       "      <th>open_act_il</th>\n",
       "      <th>open_il_12m</th>\n",
       "      <th>open_il_24m</th>\n",
       "      <th>mths_since_rcnt_il</th>\n",
       "      <th>total_bal_il</th>\n",
       "      <th>il_util</th>\n",
       "      <th>open_rv_12m</th>\n",
       "      <th>open_rv_24m</th>\n",
       "      <th>max_bal_bc</th>\n",
       "      <th>all_util</th>\n",
       "      <th>total_rev_hi_lim</th>\n",
       "      <th>inq_fi</th>\n",
       "      <th>total_cu_tl</th>\n",
       "      <th>inq_last_12m</th>\n",
       "      <th>acc_open_past_24mths</th>\n",
       "      <th>avg_cur_bal</th>\n",
       "      <th>bc_open_to_buy</th>\n",
       "      <th>bc_util</th>\n",
       "      <th>chargeoff_within_12_mths</th>\n",
       "      <th>delinq_amnt</th>\n",
       "      <th>mo_sin_old_il_acct</th>\n",
       "      <th>mo_sin_old_rev_tl_op</th>\n",
       "      <th>mo_sin_rcnt_rev_tl_op</th>\n",
       "      <th>mo_sin_rcnt_tl</th>\n",
       "      <th>mort_acc</th>\n",
       "      <th>mths_since_recent_bc</th>\n",
       "      <th>mths_since_recent_bc_dlq</th>\n",
       "      <th>mths_since_recent_inq</th>\n",
       "      <th>mths_since_recent_revol_delinq</th>\n",
       "      <th>num_accts_ever_120_pd</th>\n",
       "      <th>num_actv_bc_tl</th>\n",
       "      <th>num_actv_rev_tl</th>\n",
       "      <th>num_bc_sats</th>\n",
       "      <th>num_bc_tl</th>\n",
       "      <th>num_il_tl</th>\n",
       "      <th>num_op_rev_tl</th>\n",
       "      <th>num_rev_accts</th>\n",
       "      <th>num_rev_tl_bal_gt_0</th>\n",
       "      <th>num_sats</th>\n",
       "      <th>num_tl_120dpd_2m</th>\n",
       "      <th>num_tl_30dpd</th>\n",
       "      <th>num_tl_90g_dpd_24m</th>\n",
       "      <th>num_tl_op_past_12m</th>\n",
       "      <th>pct_tl_nvr_dlq</th>\n",
       "      <th>percent_bc_gt_75</th>\n",
       "      <th>pub_rec_bankruptcies</th>\n",
       "      <th>tax_liens</th>\n",
       "      <th>tot_hi_cred_lim</th>\n",
       "      <th>total_bal_ex_mort</th>\n",
       "      <th>total_bc_limit</th>\n",
       "      <th>total_il_high_credit_limit</th>\n",
       "      <th>revol_bal_joint</th>\n",
       "      <th>sec_app_fico_range_low</th>\n",
       "      <th>sec_app_fico_range_high</th>\n",
       "      <th>sec_app_inq_last_6mths</th>\n",
       "      <th>sec_app_mort_acc</th>\n",
       "      <th>sec_app_open_acc</th>\n",
       "      <th>sec_app_revol_util</th>\n",
       "      <th>sec_app_open_act_il</th>\n",
       "      <th>sec_app_num_rev_accts</th>\n",
       "      <th>sec_app_chargeoff_within_12_mths</th>\n",
       "      <th>sec_app_collections_12_mths_ex_med</th>\n",
       "      <th>sec_app_mths_since_last_major_derog</th>\n",
       "      <th>deferral_term</th>\n",
       "      <th>hardship_amount</th>\n",
       "      <th>hardship_length</th>\n",
       "      <th>hardship_dpd</th>\n",
       "      <th>orig_projected_additional_accrued_interest</th>\n",
       "      <th>hardship_payoff_balance_amount</th>\n",
       "      <th>hardship_last_payment_amount</th>\n",
       "      <th>settlement_amount</th>\n",
       "      <th>settlement_percentage</th>\n",
       "      <th>settlement_term</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>3600.0</td>\n",
       "      <td>3600.0</td>\n",
       "      <td>3600.0</td>\n",
       "      <td>13.99</td>\n",
       "      <td>123.03</td>\n",
       "      <td>C</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>55000.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Fully Paid</td>\n",
       "      <td>PA</td>\n",
       "      <td>5.91</td>\n",
       "      <td>0.0</td>\n",
       "      <td>675.0</td>\n",
       "      <td>679.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2765.0</td>\n",
       "      <td>29.7</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>4421.723917</td>\n",
       "      <td>4421.72</td>\n",
       "      <td>3600.00</td>\n",
       "      <td>821.72</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>122.67</td>\n",
       "      <td>564.0</td>\n",
       "      <td>560.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>722.0</td>\n",
       "      <td>144904.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>4981.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>722.0</td>\n",
       "      <td>34.0</td>\n",
       "      <td>9300.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>20701.0</td>\n",
       "      <td>1506.0</td>\n",
       "      <td>37.2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>128.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>76.9</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>178050.0</td>\n",
       "      <td>7746.0</td>\n",
       "      <td>2400.0</td>\n",
       "      <td>13734.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>24700.0</td>\n",
       "      <td>24700.0</td>\n",
       "      <td>24700.0</td>\n",
       "      <td>11.99</td>\n",
       "      <td>820.28</td>\n",
       "      <td>C</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>65000.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Fully Paid</td>\n",
       "      <td>SD</td>\n",
       "      <td>16.06</td>\n",
       "      <td>1.0</td>\n",
       "      <td>715.0</td>\n",
       "      <td>719.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>22.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21470.0</td>\n",
       "      <td>19.2</td>\n",
       "      <td>38.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>25679.660000</td>\n",
       "      <td>25679.66</td>\n",
       "      <td>24700.00</td>\n",
       "      <td>979.66</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>926.35</td>\n",
       "      <td>699.0</td>\n",
       "      <td>695.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>204396.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>18005.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6472.0</td>\n",
       "      <td>29.0</td>\n",
       "      <td>111800.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9733.0</td>\n",
       "      <td>57830.0</td>\n",
       "      <td>27.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>113.0</td>\n",
       "      <td>192.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>27.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>97.4</td>\n",
       "      <td>7.7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>314017.0</td>\n",
       "      <td>39475.0</td>\n",
       "      <td>79300.0</td>\n",
       "      <td>24667.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>10.78</td>\n",
       "      <td>432.66</td>\n",
       "      <td>B</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>63000.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Fully Paid</td>\n",
       "      <td>IL</td>\n",
       "      <td>10.78</td>\n",
       "      <td>0.0</td>\n",
       "      <td>695.0</td>\n",
       "      <td>699.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7869.0</td>\n",
       "      <td>56.2</td>\n",
       "      <td>18.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>22705.924294</td>\n",
       "      <td>22705.92</td>\n",
       "      <td>20000.00</td>\n",
       "      <td>2705.92</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>15813.30</td>\n",
       "      <td>704.0</td>\n",
       "      <td>700.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>71000.0</td>\n",
       "      <td>13.85</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>189699.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>10827.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2081.0</td>\n",
       "      <td>65.0</td>\n",
       "      <td>14000.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>31617.0</td>\n",
       "      <td>2737.0</td>\n",
       "      <td>55.9</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>125.0</td>\n",
       "      <td>184.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>101.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>218418.0</td>\n",
       "      <td>18696.0</td>\n",
       "      <td>6200.0</td>\n",
       "      <td>14877.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>35000.0</td>\n",
       "      <td>35000.0</td>\n",
       "      <td>35000.0</td>\n",
       "      <td>14.85</td>\n",
       "      <td>829.90</td>\n",
       "      <td>C</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>110000.0</td>\n",
       "      <td>Source Verified</td>\n",
       "      <td>Current</td>\n",
       "      <td>NJ</td>\n",
       "      <td>17.06</td>\n",
       "      <td>0.0</td>\n",
       "      <td>785.0</td>\n",
       "      <td>789.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7802.0</td>\n",
       "      <td>11.6</td>\n",
       "      <td>17.0</td>\n",
       "      <td>15897.65</td>\n",
       "      <td>15897.65</td>\n",
       "      <td>31464.010000</td>\n",
       "      <td>31464.01</td>\n",
       "      <td>19102.35</td>\n",
       "      <td>12361.66</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>829.90</td>\n",
       "      <td>679.0</td>\n",
       "      <td>675.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>301500.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>12609.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6987.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>67300.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>23192.0</td>\n",
       "      <td>54962.0</td>\n",
       "      <td>12.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>87.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>381215.0</td>\n",
       "      <td>52226.0</td>\n",
       "      <td>62500.0</td>\n",
       "      <td>18000.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>10400.0</td>\n",
       "      <td>10400.0</td>\n",
       "      <td>10400.0</td>\n",
       "      <td>22.45</td>\n",
       "      <td>289.91</td>\n",
       "      <td>F</td>\n",
       "      <td>3 years</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>104433.0</td>\n",
       "      <td>Source Verified</td>\n",
       "      <td>Fully Paid</td>\n",
       "      <td>PA</td>\n",
       "      <td>25.37</td>\n",
       "      <td>1.0</td>\n",
       "      <td>695.0</td>\n",
       "      <td>699.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21929.0</td>\n",
       "      <td>64.5</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>11740.500000</td>\n",
       "      <td>11740.50</td>\n",
       "      <td>10400.00</td>\n",
       "      <td>1340.50</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10128.96</td>\n",
       "      <td>704.0</td>\n",
       "      <td>700.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>331730.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>73839.0</td>\n",
       "      <td>84.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>9702.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>34000.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>27644.0</td>\n",
       "      <td>4567.0</td>\n",
       "      <td>77.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>128.0</td>\n",
       "      <td>210.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>96.6</td>\n",
       "      <td>60.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>439570.0</td>\n",
       "      <td>95768.0</td>\n",
       "      <td>20300.0</td>\n",
       "      <td>88097.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   member_id  loan_amnt  funded_amnt  funded_amnt_inv  int_rate  installment  \\\n",
       "0        NaN     3600.0       3600.0           3600.0     13.99       123.03   \n",
       "1        NaN    24700.0      24700.0          24700.0     11.99       820.28   \n",
       "2        NaN    20000.0      20000.0          20000.0     10.78       432.66   \n",
       "3        NaN    35000.0      35000.0          35000.0     14.85       829.90   \n",
       "4        NaN    10400.0      10400.0          10400.0     22.45       289.91   \n",
       "\n",
       "  grade emp_length home_ownership  annual_inc verification_status loan_status  \\\n",
       "0     C  10+ years       MORTGAGE     55000.0        Not Verified  Fully Paid   \n",
       "1     C  10+ years       MORTGAGE     65000.0        Not Verified  Fully Paid   \n",
       "2     B  10+ years       MORTGAGE     63000.0        Not Verified  Fully Paid   \n",
       "3     C  10+ years       MORTGAGE    110000.0     Source Verified     Current   \n",
       "4     F    3 years       MORTGAGE    104433.0     Source Verified  Fully Paid   \n",
       "\n",
       "  addr_state    dti  delinq_2yrs  fico_range_low  fico_range_high  \\\n",
       "0         PA   5.91          0.0           675.0            679.0   \n",
       "1         SD  16.06          1.0           715.0            719.0   \n",
       "2         IL  10.78          0.0           695.0            699.0   \n",
       "3         NJ  17.06          0.0           785.0            789.0   \n",
       "4         PA  25.37          1.0           695.0            699.0   \n",
       "\n",
       "   inq_last_6mths  mths_since_last_delinq  mths_since_last_record  open_acc  \\\n",
       "0             1.0                    30.0                     NaN       7.0   \n",
       "1             4.0                     6.0                     NaN      22.0   \n",
       "2             0.0                     NaN                     NaN       6.0   \n",
       "3             0.0                     NaN                     NaN      13.0   \n",
       "4             3.0                    12.0                     NaN      12.0   \n",
       "\n",
       "   pub_rec  revol_bal  revol_util  total_acc  out_prncp  out_prncp_inv  \\\n",
       "0      0.0     2765.0        29.7       13.0       0.00           0.00   \n",
       "1      0.0    21470.0        19.2       38.0       0.00           0.00   \n",
       "2      0.0     7869.0        56.2       18.0       0.00           0.00   \n",
       "3      0.0     7802.0        11.6       17.0   15897.65       15897.65   \n",
       "4      0.0    21929.0        64.5       35.0       0.00           0.00   \n",
       "\n",
       "    total_pymnt  total_pymnt_inv  total_rec_prncp  total_rec_int  \\\n",
       "0   4421.723917          4421.72          3600.00         821.72   \n",
       "1  25679.660000         25679.66         24700.00         979.66   \n",
       "2  22705.924294         22705.92         20000.00        2705.92   \n",
       "3  31464.010000         31464.01         19102.35       12361.66   \n",
       "4  11740.500000         11740.50         10400.00        1340.50   \n",
       "\n",
       "   total_rec_late_fee  recoveries  collection_recovery_fee  last_pymnt_amnt  \\\n",
       "0                 0.0         0.0                      0.0           122.67   \n",
       "1                 0.0         0.0                      0.0           926.35   \n",
       "2                 0.0         0.0                      0.0         15813.30   \n",
       "3                 0.0         0.0                      0.0           829.90   \n",
       "4                 0.0         0.0                      0.0         10128.96   \n",
       "\n",
       "   last_fico_range_high  last_fico_range_low  collections_12_mths_ex_med  \\\n",
       "0                 564.0                560.0                         0.0   \n",
       "1                 699.0                695.0                         0.0   \n",
       "2                 704.0                700.0                         0.0   \n",
       "3                 679.0                675.0                         0.0   \n",
       "4                 704.0                700.0                         0.0   \n",
       "\n",
       "   mths_since_last_major_derog  policy_code  annual_inc_joint  dti_joint  \\\n",
       "0                         30.0          1.0               NaN        NaN   \n",
       "1                          NaN          1.0               NaN        NaN   \n",
       "2                          NaN          1.0           71000.0      13.85   \n",
       "3                          NaN          1.0               NaN        NaN   \n",
       "4                          NaN          1.0               NaN        NaN   \n",
       "\n",
       "   acc_now_delinq  tot_coll_amt  tot_cur_bal  open_acc_6m  open_act_il  \\\n",
       "0             0.0         722.0     144904.0          2.0          2.0   \n",
       "1             0.0           0.0     204396.0          1.0          1.0   \n",
       "2             0.0           0.0     189699.0          0.0          1.0   \n",
       "3             0.0           0.0     301500.0          1.0          1.0   \n",
       "4             0.0           0.0     331730.0          1.0          3.0   \n",
       "\n",
       "   open_il_12m  open_il_24m  mths_since_rcnt_il  total_bal_il  il_util  \\\n",
       "0          0.0          1.0                21.0        4981.0     36.0   \n",
       "1          0.0          1.0                19.0       18005.0     73.0   \n",
       "2          0.0          4.0                19.0       10827.0     73.0   \n",
       "3          0.0          1.0                23.0       12609.0     70.0   \n",
       "4          0.0          3.0                14.0       73839.0     84.0   \n",
       "\n",
       "   open_rv_12m  open_rv_24m  max_bal_bc  all_util  total_rev_hi_lim  inq_fi  \\\n",
       "0          3.0          3.0       722.0      34.0            9300.0     3.0   \n",
       "1          2.0          3.0      6472.0      29.0          111800.0     0.0   \n",
       "2          0.0          2.0      2081.0      65.0           14000.0     2.0   \n",
       "3          1.0          1.0      6987.0      45.0           67300.0     0.0   \n",
       "4          4.0          7.0      9702.0      78.0           34000.0     2.0   \n",
       "\n",
       "   total_cu_tl  inq_last_12m  acc_open_past_24mths  avg_cur_bal  \\\n",
       "0          1.0           4.0                   4.0      20701.0   \n",
       "1          0.0           6.0                   4.0       9733.0   \n",
       "2          5.0           1.0                   6.0      31617.0   \n",
       "3          1.0           0.0                   2.0      23192.0   \n",
       "4          1.0           3.0                  10.0      27644.0   \n",
       "\n",
       "   bc_open_to_buy  bc_util  chargeoff_within_12_mths  delinq_amnt  \\\n",
       "0          1506.0     37.2                       0.0          0.0   \n",
       "1         57830.0     27.1                       0.0          0.0   \n",
       "2          2737.0     55.9                       0.0          0.0   \n",
       "3         54962.0     12.1                       0.0          0.0   \n",
       "4          4567.0     77.5                       0.0          0.0   \n",
       "\n",
       "   mo_sin_old_il_acct  mo_sin_old_rev_tl_op  mo_sin_rcnt_rev_tl_op  \\\n",
       "0               148.0                 128.0                    3.0   \n",
       "1               113.0                 192.0                    2.0   \n",
       "2               125.0                 184.0                   14.0   \n",
       "3                36.0                  87.0                    2.0   \n",
       "4               128.0                 210.0                    4.0   \n",
       "\n",
       "   mo_sin_rcnt_tl  mort_acc  mths_since_recent_bc  mths_since_recent_bc_dlq  \\\n",
       "0             3.0       1.0                   4.0                      69.0   \n",
       "1             2.0       4.0                   2.0                       NaN   \n",
       "2            14.0       5.0                 101.0                       NaN   \n",
       "3             2.0       1.0                   2.0                       NaN   \n",
       "4             4.0       6.0                   4.0                      12.0   \n",
       "\n",
       "   mths_since_recent_inq  mths_since_recent_revol_delinq  \\\n",
       "0                    4.0                            69.0   \n",
       "1                    0.0                             6.0   \n",
       "2                   10.0                             NaN   \n",
       "3                    NaN                             NaN   \n",
       "4                    1.0                            12.0   \n",
       "\n",
       "   num_accts_ever_120_pd  num_actv_bc_tl  num_actv_rev_tl  num_bc_sats  \\\n",
       "0                    2.0             2.0              4.0          2.0   \n",
       "1                    0.0             5.0              5.0         13.0   \n",
       "2                    0.0             2.0              3.0          2.0   \n",
       "3                    0.0             4.0              5.0          8.0   \n",
       "4                    0.0             4.0              6.0          5.0   \n",
       "\n",
       "   num_bc_tl  num_il_tl  num_op_rev_tl  num_rev_accts  num_rev_tl_bal_gt_0  \\\n",
       "0        5.0        3.0            4.0            9.0                  4.0   \n",
       "1       17.0        6.0           20.0           27.0                  5.0   \n",
       "2        4.0        6.0            4.0            7.0                  3.0   \n",
       "3       10.0        2.0           10.0           13.0                  5.0   \n",
       "4        9.0       10.0            7.0           19.0                  6.0   \n",
       "\n",
       "   num_sats  num_tl_120dpd_2m  num_tl_30dpd  num_tl_90g_dpd_24m  \\\n",
       "0       7.0               0.0           0.0                 0.0   \n",
       "1      22.0               0.0           0.0                 0.0   \n",
       "2       6.0               0.0           0.0                 0.0   \n",
       "3      13.0               0.0           0.0                 0.0   \n",
       "4      12.0               0.0           0.0                 0.0   \n",
       "\n",
       "   num_tl_op_past_12m  pct_tl_nvr_dlq  percent_bc_gt_75  pub_rec_bankruptcies  \\\n",
       "0                 3.0            76.9               0.0                   0.0   \n",
       "1                 2.0            97.4               7.7                   0.0   \n",
       "2                 0.0           100.0              50.0                   0.0   \n",
       "3                 1.0           100.0               0.0                   0.0   \n",
       "4                 4.0            96.6              60.0                   0.0   \n",
       "\n",
       "   tax_liens  tot_hi_cred_lim  total_bal_ex_mort  total_bc_limit  \\\n",
       "0        0.0         178050.0             7746.0          2400.0   \n",
       "1        0.0         314017.0            39475.0         79300.0   \n",
       "2        0.0         218418.0            18696.0          6200.0   \n",
       "3        0.0         381215.0            52226.0         62500.0   \n",
       "4        0.0         439570.0            95768.0         20300.0   \n",
       "\n",
       "   total_il_high_credit_limit  revol_bal_joint  sec_app_fico_range_low  \\\n",
       "0                     13734.0              NaN                     NaN   \n",
       "1                     24667.0              NaN                     NaN   \n",
       "2                     14877.0              NaN                     NaN   \n",
       "3                     18000.0              NaN                     NaN   \n",
       "4                     88097.0              NaN                     NaN   \n",
       "\n",
       "   sec_app_fico_range_high  sec_app_inq_last_6mths  sec_app_mort_acc  \\\n",
       "0                      NaN                     NaN               NaN   \n",
       "1                      NaN                     NaN               NaN   \n",
       "2                      NaN                     NaN               NaN   \n",
       "3                      NaN                     NaN               NaN   \n",
       "4                      NaN                     NaN               NaN   \n",
       "\n",
       "   sec_app_open_acc  sec_app_revol_util  sec_app_open_act_il  \\\n",
       "0               NaN                 NaN                  NaN   \n",
       "1               NaN                 NaN                  NaN   \n",
       "2               NaN                 NaN                  NaN   \n",
       "3               NaN                 NaN                  NaN   \n",
       "4               NaN                 NaN                  NaN   \n",
       "\n",
       "   sec_app_num_rev_accts  sec_app_chargeoff_within_12_mths  \\\n",
       "0                    NaN                               NaN   \n",
       "1                    NaN                               NaN   \n",
       "2                    NaN                               NaN   \n",
       "3                    NaN                               NaN   \n",
       "4                    NaN                               NaN   \n",
       "\n",
       "   sec_app_collections_12_mths_ex_med  sec_app_mths_since_last_major_derog  \\\n",
       "0                                 NaN                                  NaN   \n",
       "1                                 NaN                                  NaN   \n",
       "2                                 NaN                                  NaN   \n",
       "3                                 NaN                                  NaN   \n",
       "4                                 NaN                                  NaN   \n",
       "\n",
       "   deferral_term  hardship_amount  hardship_length  hardship_dpd  \\\n",
       "0            NaN              NaN              NaN           NaN   \n",
       "1            NaN              NaN              NaN           NaN   \n",
       "2            NaN              NaN              NaN           NaN   \n",
       "3            NaN              NaN              NaN           NaN   \n",
       "4            NaN              NaN              NaN           NaN   \n",
       "\n",
       "   orig_projected_additional_accrued_interest  hardship_payoff_balance_amount  \\\n",
       "0                                         NaN                             NaN   \n",
       "1                                         NaN                             NaN   \n",
       "2                                         NaN                             NaN   \n",
       "3                                         NaN                             NaN   \n",
       "4                                         NaN                             NaN   \n",
       "\n",
       "   hardship_last_payment_amount  settlement_amount  settlement_percentage  \\\n",
       "0                           NaN                NaN                    NaN   \n",
       "1                           NaN                NaN                    NaN   \n",
       "2                           NaN                NaN                    NaN   \n",
       "3                           NaN                NaN                    NaN   \n",
       "4                           NaN                NaN                    NaN   \n",
       "\n",
       "   settlement_term  \n",
       "0              NaN  \n",
       "1              NaN  \n",
       "2              NaN  \n",
       "3              NaN  \n",
       "4              NaN  "
      ]
     },
     "execution_count": 128,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "id": "35420511",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2260701, 119)"
      ]
     },
     "execution_count": 129,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "af565943",
   "metadata": {},
   "source": [
    "### 删除空值比较多的列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "id": "063f0277",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['member_id', 'mths_since_last_delinq', 'mths_since_last_record',\n",
       "       'mths_since_last_major_derog', 'annual_inc_joint', 'dti_joint',\n",
       "       'mths_since_recent_bc_dlq', 'mths_since_recent_revol_delinq',\n",
       "       'revol_bal_joint', 'sec_app_fico_range_low', 'sec_app_fico_range_high',\n",
       "       'sec_app_inq_last_6mths', 'sec_app_mort_acc', 'sec_app_open_acc',\n",
       "       'sec_app_revol_util', 'sec_app_open_act_il', 'sec_app_num_rev_accts',\n",
       "       'sec_app_chargeoff_within_12_mths',\n",
       "       'sec_app_collections_12_mths_ex_med',\n",
       "       'sec_app_mths_since_last_major_derog', 'deferral_term',\n",
       "       'hardship_amount', 'hardship_length', 'hardship_dpd',\n",
       "       'orig_projected_additional_accrued_interest',\n",
       "       'hardship_payoff_balance_amount', 'hardship_last_payment_amount',\n",
       "       'settlement_amount', 'settlement_percentage', 'settlement_term'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 130,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "drop_nan = data.isnull().sum()\n",
    "drop_nan = drop_nan[drop_nan.values > (len(data) * 0.50)]\n",
    "drop_nan_dun = drop_nan.index\n",
    "drop_nan_dun"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "id": "239adb25",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.drop(labels=drop_nan_dun, inplace=True, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "id": "561768b4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>funded_amnt</th>\n",
       "      <th>funded_amnt_inv</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>installment</th>\n",
       "      <th>grade</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>home_ownership</th>\n",
       "      <th>annual_inc</th>\n",
       "      <th>verification_status</th>\n",
       "      <th>loan_status</th>\n",
       "      <th>addr_state</th>\n",
       "      <th>dti</th>\n",
       "      <th>delinq_2yrs</th>\n",
       "      <th>fico_range_low</th>\n",
       "      <th>fico_range_high</th>\n",
       "      <th>inq_last_6mths</th>\n",
       "      <th>open_acc</th>\n",
       "      <th>pub_rec</th>\n",
       "      <th>revol_bal</th>\n",
       "      <th>revol_util</th>\n",
       "      <th>total_acc</th>\n",
       "      <th>out_prncp</th>\n",
       "      <th>out_prncp_inv</th>\n",
       "      <th>total_pymnt</th>\n",
       "      <th>total_pymnt_inv</th>\n",
       "      <th>total_rec_prncp</th>\n",
       "      <th>total_rec_int</th>\n",
       "      <th>total_rec_late_fee</th>\n",
       "      <th>recoveries</th>\n",
       "      <th>collection_recovery_fee</th>\n",
       "      <th>last_pymnt_amnt</th>\n",
       "      <th>last_fico_range_high</th>\n",
       "      <th>last_fico_range_low</th>\n",
       "      <th>collections_12_mths_ex_med</th>\n",
       "      <th>policy_code</th>\n",
       "      <th>acc_now_delinq</th>\n",
       "      <th>tot_coll_amt</th>\n",
       "      <th>tot_cur_bal</th>\n",
       "      <th>open_acc_6m</th>\n",
       "      <th>open_act_il</th>\n",
       "      <th>open_il_12m</th>\n",
       "      <th>open_il_24m</th>\n",
       "      <th>mths_since_rcnt_il</th>\n",
       "      <th>total_bal_il</th>\n",
       "      <th>il_util</th>\n",
       "      <th>open_rv_12m</th>\n",
       "      <th>open_rv_24m</th>\n",
       "      <th>max_bal_bc</th>\n",
       "      <th>all_util</th>\n",
       "      <th>total_rev_hi_lim</th>\n",
       "      <th>inq_fi</th>\n",
       "      <th>total_cu_tl</th>\n",
       "      <th>inq_last_12m</th>\n",
       "      <th>acc_open_past_24mths</th>\n",
       "      <th>avg_cur_bal</th>\n",
       "      <th>bc_open_to_buy</th>\n",
       "      <th>bc_util</th>\n",
       "      <th>chargeoff_within_12_mths</th>\n",
       "      <th>delinq_amnt</th>\n",
       "      <th>mo_sin_old_il_acct</th>\n",
       "      <th>mo_sin_old_rev_tl_op</th>\n",
       "      <th>mo_sin_rcnt_rev_tl_op</th>\n",
       "      <th>mo_sin_rcnt_tl</th>\n",
       "      <th>mort_acc</th>\n",
       "      <th>mths_since_recent_bc</th>\n",
       "      <th>mths_since_recent_inq</th>\n",
       "      <th>num_accts_ever_120_pd</th>\n",
       "      <th>num_actv_bc_tl</th>\n",
       "      <th>num_actv_rev_tl</th>\n",
       "      <th>num_bc_sats</th>\n",
       "      <th>num_bc_tl</th>\n",
       "      <th>num_il_tl</th>\n",
       "      <th>num_op_rev_tl</th>\n",
       "      <th>num_rev_accts</th>\n",
       "      <th>num_rev_tl_bal_gt_0</th>\n",
       "      <th>num_sats</th>\n",
       "      <th>num_tl_120dpd_2m</th>\n",
       "      <th>num_tl_30dpd</th>\n",
       "      <th>num_tl_90g_dpd_24m</th>\n",
       "      <th>num_tl_op_past_12m</th>\n",
       "      <th>pct_tl_nvr_dlq</th>\n",
       "      <th>percent_bc_gt_75</th>\n",
       "      <th>pub_rec_bankruptcies</th>\n",
       "      <th>tax_liens</th>\n",
       "      <th>tot_hi_cred_lim</th>\n",
       "      <th>total_bal_ex_mort</th>\n",
       "      <th>total_bc_limit</th>\n",
       "      <th>total_il_high_credit_limit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3600.0</td>\n",
       "      <td>3600.0</td>\n",
       "      <td>3600.0</td>\n",
       "      <td>13.99</td>\n",
       "      <td>123.03</td>\n",
       "      <td>C</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>55000.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Fully Paid</td>\n",
       "      <td>PA</td>\n",
       "      <td>5.91</td>\n",
       "      <td>0.0</td>\n",
       "      <td>675.0</td>\n",
       "      <td>679.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2765.0</td>\n",
       "      <td>29.7</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>4421.723917</td>\n",
       "      <td>4421.72</td>\n",
       "      <td>3600.00</td>\n",
       "      <td>821.72</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>122.67</td>\n",
       "      <td>564.0</td>\n",
       "      <td>560.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>722.0</td>\n",
       "      <td>144904.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>4981.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>722.0</td>\n",
       "      <td>34.0</td>\n",
       "      <td>9300.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>20701.0</td>\n",
       "      <td>1506.0</td>\n",
       "      <td>37.2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>128.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>76.9</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>178050.0</td>\n",
       "      <td>7746.0</td>\n",
       "      <td>2400.0</td>\n",
       "      <td>13734.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>24700.0</td>\n",
       "      <td>24700.0</td>\n",
       "      <td>24700.0</td>\n",
       "      <td>11.99</td>\n",
       "      <td>820.28</td>\n",
       "      <td>C</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>65000.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Fully Paid</td>\n",
       "      <td>SD</td>\n",
       "      <td>16.06</td>\n",
       "      <td>1.0</td>\n",
       "      <td>715.0</td>\n",
       "      <td>719.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21470.0</td>\n",
       "      <td>19.2</td>\n",
       "      <td>38.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>25679.660000</td>\n",
       "      <td>25679.66</td>\n",
       "      <td>24700.00</td>\n",
       "      <td>979.66</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>926.35</td>\n",
       "      <td>699.0</td>\n",
       "      <td>695.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>204396.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>18005.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6472.0</td>\n",
       "      <td>29.0</td>\n",
       "      <td>111800.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9733.0</td>\n",
       "      <td>57830.0</td>\n",
       "      <td>27.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>113.0</td>\n",
       "      <td>192.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>27.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>97.4</td>\n",
       "      <td>7.7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>314017.0</td>\n",
       "      <td>39475.0</td>\n",
       "      <td>79300.0</td>\n",
       "      <td>24667.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20000.0</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>10.78</td>\n",
       "      <td>432.66</td>\n",
       "      <td>B</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>63000.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Fully Paid</td>\n",
       "      <td>IL</td>\n",
       "      <td>10.78</td>\n",
       "      <td>0.0</td>\n",
       "      <td>695.0</td>\n",
       "      <td>699.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7869.0</td>\n",
       "      <td>56.2</td>\n",
       "      <td>18.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>22705.924294</td>\n",
       "      <td>22705.92</td>\n",
       "      <td>20000.00</td>\n",
       "      <td>2705.92</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>15813.30</td>\n",
       "      <td>704.0</td>\n",
       "      <td>700.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>189699.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>10827.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2081.0</td>\n",
       "      <td>65.0</td>\n",
       "      <td>14000.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>31617.0</td>\n",
       "      <td>2737.0</td>\n",
       "      <td>55.9</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>125.0</td>\n",
       "      <td>184.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>101.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>218418.0</td>\n",
       "      <td>18696.0</td>\n",
       "      <td>6200.0</td>\n",
       "      <td>14877.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>35000.0</td>\n",
       "      <td>35000.0</td>\n",
       "      <td>35000.0</td>\n",
       "      <td>14.85</td>\n",
       "      <td>829.90</td>\n",
       "      <td>C</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>110000.0</td>\n",
       "      <td>Source Verified</td>\n",
       "      <td>Current</td>\n",
       "      <td>NJ</td>\n",
       "      <td>17.06</td>\n",
       "      <td>0.0</td>\n",
       "      <td>785.0</td>\n",
       "      <td>789.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7802.0</td>\n",
       "      <td>11.6</td>\n",
       "      <td>17.0</td>\n",
       "      <td>15897.65</td>\n",
       "      <td>15897.65</td>\n",
       "      <td>31464.010000</td>\n",
       "      <td>31464.01</td>\n",
       "      <td>19102.35</td>\n",
       "      <td>12361.66</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>829.90</td>\n",
       "      <td>679.0</td>\n",
       "      <td>675.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>301500.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>12609.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6987.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>67300.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>23192.0</td>\n",
       "      <td>54962.0</td>\n",
       "      <td>12.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>87.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>381215.0</td>\n",
       "      <td>52226.0</td>\n",
       "      <td>62500.0</td>\n",
       "      <td>18000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10400.0</td>\n",
       "      <td>10400.0</td>\n",
       "      <td>10400.0</td>\n",
       "      <td>22.45</td>\n",
       "      <td>289.91</td>\n",
       "      <td>F</td>\n",
       "      <td>3 years</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>104433.0</td>\n",
       "      <td>Source Verified</td>\n",
       "      <td>Fully Paid</td>\n",
       "      <td>PA</td>\n",
       "      <td>25.37</td>\n",
       "      <td>1.0</td>\n",
       "      <td>695.0</td>\n",
       "      <td>699.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21929.0</td>\n",
       "      <td>64.5</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>11740.500000</td>\n",
       "      <td>11740.50</td>\n",
       "      <td>10400.00</td>\n",
       "      <td>1340.50</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10128.96</td>\n",
       "      <td>704.0</td>\n",
       "      <td>700.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>331730.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>73839.0</td>\n",
       "      <td>84.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>9702.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>34000.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>27644.0</td>\n",
       "      <td>4567.0</td>\n",
       "      <td>77.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>128.0</td>\n",
       "      <td>210.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>96.6</td>\n",
       "      <td>60.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>439570.0</td>\n",
       "      <td>95768.0</td>\n",
       "      <td>20300.0</td>\n",
       "      <td>88097.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   loan_amnt  funded_amnt  funded_amnt_inv  int_rate  installment grade  \\\n",
       "0     3600.0       3600.0           3600.0     13.99       123.03     C   \n",
       "1    24700.0      24700.0          24700.0     11.99       820.28     C   \n",
       "2    20000.0      20000.0          20000.0     10.78       432.66     B   \n",
       "3    35000.0      35000.0          35000.0     14.85       829.90     C   \n",
       "4    10400.0      10400.0          10400.0     22.45       289.91     F   \n",
       "\n",
       "  emp_length home_ownership  annual_inc verification_status loan_status  \\\n",
       "0  10+ years       MORTGAGE     55000.0        Not Verified  Fully Paid   \n",
       "1  10+ years       MORTGAGE     65000.0        Not Verified  Fully Paid   \n",
       "2  10+ years       MORTGAGE     63000.0        Not Verified  Fully Paid   \n",
       "3  10+ years       MORTGAGE    110000.0     Source Verified     Current   \n",
       "4    3 years       MORTGAGE    104433.0     Source Verified  Fully Paid   \n",
       "\n",
       "  addr_state    dti  delinq_2yrs  fico_range_low  fico_range_high  \\\n",
       "0         PA   5.91          0.0           675.0            679.0   \n",
       "1         SD  16.06          1.0           715.0            719.0   \n",
       "2         IL  10.78          0.0           695.0            699.0   \n",
       "3         NJ  17.06          0.0           785.0            789.0   \n",
       "4         PA  25.37          1.0           695.0            699.0   \n",
       "\n",
       "   inq_last_6mths  open_acc  pub_rec  revol_bal  revol_util  total_acc  \\\n",
       "0             1.0       7.0      0.0     2765.0        29.7       13.0   \n",
       "1             4.0      22.0      0.0    21470.0        19.2       38.0   \n",
       "2             0.0       6.0      0.0     7869.0        56.2       18.0   \n",
       "3             0.0      13.0      0.0     7802.0        11.6       17.0   \n",
       "4             3.0      12.0      0.0    21929.0        64.5       35.0   \n",
       "\n",
       "   out_prncp  out_prncp_inv   total_pymnt  total_pymnt_inv  total_rec_prncp  \\\n",
       "0       0.00           0.00   4421.723917          4421.72          3600.00   \n",
       "1       0.00           0.00  25679.660000         25679.66         24700.00   \n",
       "2       0.00           0.00  22705.924294         22705.92         20000.00   \n",
       "3   15897.65       15897.65  31464.010000         31464.01         19102.35   \n",
       "4       0.00           0.00  11740.500000         11740.50         10400.00   \n",
       "\n",
       "   total_rec_int  total_rec_late_fee  recoveries  collection_recovery_fee  \\\n",
       "0         821.72                 0.0         0.0                      0.0   \n",
       "1         979.66                 0.0         0.0                      0.0   \n",
       "2        2705.92                 0.0         0.0                      0.0   \n",
       "3       12361.66                 0.0         0.0                      0.0   \n",
       "4        1340.50                 0.0         0.0                      0.0   \n",
       "\n",
       "   last_pymnt_amnt  last_fico_range_high  last_fico_range_low  \\\n",
       "0           122.67                 564.0                560.0   \n",
       "1           926.35                 699.0                695.0   \n",
       "2         15813.30                 704.0                700.0   \n",
       "3           829.90                 679.0                675.0   \n",
       "4         10128.96                 704.0                700.0   \n",
       "\n",
       "   collections_12_mths_ex_med  policy_code  acc_now_delinq  tot_coll_amt  \\\n",
       "0                         0.0          1.0             0.0         722.0   \n",
       "1                         0.0          1.0             0.0           0.0   \n",
       "2                         0.0          1.0             0.0           0.0   \n",
       "3                         0.0          1.0             0.0           0.0   \n",
       "4                         0.0          1.0             0.0           0.0   \n",
       "\n",
       "   tot_cur_bal  open_acc_6m  open_act_il  open_il_12m  open_il_24m  \\\n",
       "0     144904.0          2.0          2.0          0.0          1.0   \n",
       "1     204396.0          1.0          1.0          0.0          1.0   \n",
       "2     189699.0          0.0          1.0          0.0          4.0   \n",
       "3     301500.0          1.0          1.0          0.0          1.0   \n",
       "4     331730.0          1.0          3.0          0.0          3.0   \n",
       "\n",
       "   mths_since_rcnt_il  total_bal_il  il_util  open_rv_12m  open_rv_24m  \\\n",
       "0                21.0        4981.0     36.0          3.0          3.0   \n",
       "1                19.0       18005.0     73.0          2.0          3.0   \n",
       "2                19.0       10827.0     73.0          0.0          2.0   \n",
       "3                23.0       12609.0     70.0          1.0          1.0   \n",
       "4                14.0       73839.0     84.0          4.0          7.0   \n",
       "\n",
       "   max_bal_bc  all_util  total_rev_hi_lim  inq_fi  total_cu_tl  inq_last_12m  \\\n",
       "0       722.0      34.0            9300.0     3.0          1.0           4.0   \n",
       "1      6472.0      29.0          111800.0     0.0          0.0           6.0   \n",
       "2      2081.0      65.0           14000.0     2.0          5.0           1.0   \n",
       "3      6987.0      45.0           67300.0     0.0          1.0           0.0   \n",
       "4      9702.0      78.0           34000.0     2.0          1.0           3.0   \n",
       "\n",
       "   acc_open_past_24mths  avg_cur_bal  bc_open_to_buy  bc_util  \\\n",
       "0                   4.0      20701.0          1506.0     37.2   \n",
       "1                   4.0       9733.0         57830.0     27.1   \n",
       "2                   6.0      31617.0          2737.0     55.9   \n",
       "3                   2.0      23192.0         54962.0     12.1   \n",
       "4                  10.0      27644.0          4567.0     77.5   \n",
       "\n",
       "   chargeoff_within_12_mths  delinq_amnt  mo_sin_old_il_acct  \\\n",
       "0                       0.0          0.0               148.0   \n",
       "1                       0.0          0.0               113.0   \n",
       "2                       0.0          0.0               125.0   \n",
       "3                       0.0          0.0                36.0   \n",
       "4                       0.0          0.0               128.0   \n",
       "\n",
       "   mo_sin_old_rev_tl_op  mo_sin_rcnt_rev_tl_op  mo_sin_rcnt_tl  mort_acc  \\\n",
       "0                 128.0                    3.0             3.0       1.0   \n",
       "1                 192.0                    2.0             2.0       4.0   \n",
       "2                 184.0                   14.0            14.0       5.0   \n",
       "3                  87.0                    2.0             2.0       1.0   \n",
       "4                 210.0                    4.0             4.0       6.0   \n",
       "\n",
       "   mths_since_recent_bc  mths_since_recent_inq  num_accts_ever_120_pd  \\\n",
       "0                   4.0                    4.0                    2.0   \n",
       "1                   2.0                    0.0                    0.0   \n",
       "2                 101.0                   10.0                    0.0   \n",
       "3                   2.0                    NaN                    0.0   \n",
       "4                   4.0                    1.0                    0.0   \n",
       "\n",
       "   num_actv_bc_tl  num_actv_rev_tl  num_bc_sats  num_bc_tl  num_il_tl  \\\n",
       "0             2.0              4.0          2.0        5.0        3.0   \n",
       "1             5.0              5.0         13.0       17.0        6.0   \n",
       "2             2.0              3.0          2.0        4.0        6.0   \n",
       "3             4.0              5.0          8.0       10.0        2.0   \n",
       "4             4.0              6.0          5.0        9.0       10.0   \n",
       "\n",
       "   num_op_rev_tl  num_rev_accts  num_rev_tl_bal_gt_0  num_sats  \\\n",
       "0            4.0            9.0                  4.0       7.0   \n",
       "1           20.0           27.0                  5.0      22.0   \n",
       "2            4.0            7.0                  3.0       6.0   \n",
       "3           10.0           13.0                  5.0      13.0   \n",
       "4            7.0           19.0                  6.0      12.0   \n",
       "\n",
       "   num_tl_120dpd_2m  num_tl_30dpd  num_tl_90g_dpd_24m  num_tl_op_past_12m  \\\n",
       "0               0.0           0.0                 0.0                 3.0   \n",
       "1               0.0           0.0                 0.0                 2.0   \n",
       "2               0.0           0.0                 0.0                 0.0   \n",
       "3               0.0           0.0                 0.0                 1.0   \n",
       "4               0.0           0.0                 0.0                 4.0   \n",
       "\n",
       "   pct_tl_nvr_dlq  percent_bc_gt_75  pub_rec_bankruptcies  tax_liens  \\\n",
       "0            76.9               0.0                   0.0        0.0   \n",
       "1            97.4               7.7                   0.0        0.0   \n",
       "2           100.0              50.0                   0.0        0.0   \n",
       "3           100.0               0.0                   0.0        0.0   \n",
       "4            96.6              60.0                   0.0        0.0   \n",
       "\n",
       "   tot_hi_cred_lim  total_bal_ex_mort  total_bc_limit  \\\n",
       "0         178050.0             7746.0          2400.0   \n",
       "1         314017.0            39475.0         79300.0   \n",
       "2         218418.0            18696.0          6200.0   \n",
       "3         381215.0            52226.0         62500.0   \n",
       "4         439570.0            95768.0         20300.0   \n",
       "\n",
       "   total_il_high_credit_limit  \n",
       "0                     13734.0  \n",
       "1                     24667.0  \n",
       "2                     14877.0  \n",
       "3                     18000.0  \n",
       "4                     88097.0  "
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "id": "8578c3e5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2260701, 89)"
      ]
     },
     "execution_count": 133,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fa2641e3",
   "metadata": {},
   "source": [
    "### 删除只有唯一值的列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "id": "b515ed50",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['policy_code']"
      ]
     },
     "execution_count": 135,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "original_columns = data.columns\n",
    "drop_columns = []\n",
    "for col in original_columns:\n",
    "    col_series = data[col].dropna().unique()\n",
    "    if len(col_series) == 1:\n",
    "        drop_columns.append(col)\n",
    "drop_columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "id": "fc2e8eb2",
   "metadata": {},
   "outputs": [],
   "source": [
    "if drop_columns:\n",
    "    data.drop(labels=drop_columns, inplace=True, axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7ff5a768",
   "metadata": {},
   "source": [
    "## 数据处理"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "344ae581",
   "metadata": {},
   "source": [
    "### 标签处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "id": "522385e0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "B    663557\n",
       "C    650053\n",
       "A    433027\n",
       "D    324424\n",
       "E    135639\n",
       "F     41800\n",
       "G     12168\n",
       "Name: grade, dtype: int64"
      ]
     },
     "execution_count": 137,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['grade'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "id": "53d38dbb",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 删除grage为空的行\n",
    "data = data[data['grade'].notna()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "id": "2096b3ca",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2260668, 88)"
      ]
     },
     "execution_count": 139,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "id": "28dc1123",
   "metadata": {},
   "outputs": [],
   "source": [
    "# grade E/F/G 合并为一类\n",
    "# A, B, C, D, E 替换为 0, 1, 2 ,3 ,4\n",
    "grade_replace = {\n",
    "    \"grade\": {\n",
    "        \"A\": 0,\n",
    "        \"B\": 1,\n",
    "        \"C\": 2,\n",
    "        \"D\": 3,\n",
    "        \"E\": 4,\n",
    "        \"F\": 4,\n",
    "        \"G\": 4,\n",
    "    }\n",
    "}\n",
    "data = data.replace(grade_replace)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "id": "ad8e1de1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    663557\n",
       "2    650053\n",
       "0    433027\n",
       "3    324424\n",
       "4    189607\n",
       "Name: grade, dtype: int64"
      ]
     },
     "execution_count": 141,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['grade'].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5ab15f90",
   "metadata": {},
   "source": [
    "### 离散值处理"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cd52203a",
   "metadata": {},
   "source": [
    "#### emp_length"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "id": "592e073d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10+ years    748005\n",
       "2 years      203677\n",
       "< 1 year     189988\n",
       "3 years      180753\n",
       "1 year       148403\n",
       "5 years      139698\n",
       "4 years      136605\n",
       "6 years      102628\n",
       "7 years       92695\n",
       "8 years       91914\n",
       "9 years       79395\n",
       "Name: emp_length, dtype: int64"
      ]
     },
     "execution_count": 142,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['emp_length'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "id": "5f01de32",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 处理工作时长 emp_length\n",
    "emp_length_replace = {\n",
    "    \"emp_length\": {\n",
    "        '< 1 year': 0,\n",
    "        '1 year': 1,\n",
    "        '2 years': 2,\n",
    "        '3 years': 3,\n",
    "        '4 years': 4,\n",
    "        '5 years': 5,\n",
    "        '6 years': 6,\n",
    "        '7 years': 7,\n",
    "        '8 years': 8,\n",
    "        '9 years': 9,\n",
    "        '10+ years': 10,      \n",
    "    }\n",
    "}\n",
    "data = data.replace(emp_length_replace)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "id": "49549c17",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10.0    748005\n",
       "2.0     203677\n",
       "0.0     189988\n",
       "3.0     180753\n",
       "1.0     148403\n",
       "5.0     139698\n",
       "4.0     136605\n",
       "6.0     102628\n",
       "7.0      92695\n",
       "8.0      91914\n",
       "9.0      79395\n",
       "Name: emp_length, dtype: int64"
      ]
     },
     "execution_count": 144,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['emp_length'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "id": "41c80c78",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6.0"
      ]
     },
     "execution_count": 145,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['emp_length'].median()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "id": "a145777a",
   "metadata": {},
   "outputs": [],
   "source": [
    "data['emp_length'] = data['emp_length'].fillna(value=6.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "id": "0ad4c768",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10.0    748005\n",
       "6.0     249535\n",
       "2.0     203677\n",
       "0.0     189988\n",
       "3.0     180753\n",
       "1.0     148403\n",
       "5.0     139698\n",
       "4.0     136605\n",
       "7.0      92695\n",
       "8.0      91914\n",
       "9.0      79395\n",
       "Name: emp_length, dtype: int64"
      ]
     },
     "execution_count": 147,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['emp_length'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "id": "8ba2979b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>funded_amnt</th>\n",
       "      <th>funded_amnt_inv</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>installment</th>\n",
       "      <th>grade</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>home_ownership</th>\n",
       "      <th>annual_inc</th>\n",
       "      <th>verification_status</th>\n",
       "      <th>loan_status</th>\n",
       "      <th>addr_state</th>\n",
       "      <th>dti</th>\n",
       "      <th>delinq_2yrs</th>\n",
       "      <th>fico_range_low</th>\n",
       "      <th>fico_range_high</th>\n",
       "      <th>inq_last_6mths</th>\n",
       "      <th>open_acc</th>\n",
       "      <th>pub_rec</th>\n",
       "      <th>revol_bal</th>\n",
       "      <th>revol_util</th>\n",
       "      <th>total_acc</th>\n",
       "      <th>out_prncp</th>\n",
       "      <th>out_prncp_inv</th>\n",
       "      <th>total_pymnt</th>\n",
       "      <th>total_pymnt_inv</th>\n",
       "      <th>total_rec_prncp</th>\n",
       "      <th>total_rec_int</th>\n",
       "      <th>total_rec_late_fee</th>\n",
       "      <th>recoveries</th>\n",
       "      <th>collection_recovery_fee</th>\n",
       "      <th>last_pymnt_amnt</th>\n",
       "      <th>last_fico_range_high</th>\n",
       "      <th>last_fico_range_low</th>\n",
       "      <th>collections_12_mths_ex_med</th>\n",
       "      <th>acc_now_delinq</th>\n",
       "      <th>tot_coll_amt</th>\n",
       "      <th>tot_cur_bal</th>\n",
       "      <th>open_acc_6m</th>\n",
       "      <th>open_act_il</th>\n",
       "      <th>open_il_12m</th>\n",
       "      <th>open_il_24m</th>\n",
       "      <th>mths_since_rcnt_il</th>\n",
       "      <th>total_bal_il</th>\n",
       "      <th>il_util</th>\n",
       "      <th>open_rv_12m</th>\n",
       "      <th>open_rv_24m</th>\n",
       "      <th>max_bal_bc</th>\n",
       "      <th>all_util</th>\n",
       "      <th>total_rev_hi_lim</th>\n",
       "      <th>inq_fi</th>\n",
       "      <th>total_cu_tl</th>\n",
       "      <th>inq_last_12m</th>\n",
       "      <th>acc_open_past_24mths</th>\n",
       "      <th>avg_cur_bal</th>\n",
       "      <th>bc_open_to_buy</th>\n",
       "      <th>bc_util</th>\n",
       "      <th>chargeoff_within_12_mths</th>\n",
       "      <th>delinq_amnt</th>\n",
       "      <th>mo_sin_old_il_acct</th>\n",
       "      <th>mo_sin_old_rev_tl_op</th>\n",
       "      <th>mo_sin_rcnt_rev_tl_op</th>\n",
       "      <th>mo_sin_rcnt_tl</th>\n",
       "      <th>mort_acc</th>\n",
       "      <th>mths_since_recent_bc</th>\n",
       "      <th>mths_since_recent_inq</th>\n",
       "      <th>num_accts_ever_120_pd</th>\n",
       "      <th>num_actv_bc_tl</th>\n",
       "      <th>num_actv_rev_tl</th>\n",
       "      <th>num_bc_sats</th>\n",
       "      <th>num_bc_tl</th>\n",
       "      <th>num_il_tl</th>\n",
       "      <th>num_op_rev_tl</th>\n",
       "      <th>num_rev_accts</th>\n",
       "      <th>num_rev_tl_bal_gt_0</th>\n",
       "      <th>num_sats</th>\n",
       "      <th>num_tl_120dpd_2m</th>\n",
       "      <th>num_tl_30dpd</th>\n",
       "      <th>num_tl_90g_dpd_24m</th>\n",
       "      <th>num_tl_op_past_12m</th>\n",
       "      <th>pct_tl_nvr_dlq</th>\n",
       "      <th>percent_bc_gt_75</th>\n",
       "      <th>pub_rec_bankruptcies</th>\n",
       "      <th>tax_liens</th>\n",
       "      <th>tot_hi_cred_lim</th>\n",
       "      <th>total_bal_ex_mort</th>\n",
       "      <th>total_bc_limit</th>\n",
       "      <th>total_il_high_credit_limit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3600.0</td>\n",
       "      <td>3600.0</td>\n",
       "      <td>3600.0</td>\n",
       "      <td>13.99</td>\n",
       "      <td>123.03</td>\n",
       "      <td>2</td>\n",
       "      <td>10.0</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>55000.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Fully Paid</td>\n",
       "      <td>PA</td>\n",
       "      <td>5.91</td>\n",
       "      <td>0.0</td>\n",
       "      <td>675.0</td>\n",
       "      <td>679.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2765.0</td>\n",
       "      <td>29.7</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>4421.723917</td>\n",
       "      <td>4421.72</td>\n",
       "      <td>3600.00</td>\n",
       "      <td>821.72</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>122.67</td>\n",
       "      <td>564.0</td>\n",
       "      <td>560.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>722.0</td>\n",
       "      <td>144904.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>4981.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>722.0</td>\n",
       "      <td>34.0</td>\n",
       "      <td>9300.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>20701.0</td>\n",
       "      <td>1506.0</td>\n",
       "      <td>37.2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>128.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>76.9</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>178050.0</td>\n",
       "      <td>7746.0</td>\n",
       "      <td>2400.0</td>\n",
       "      <td>13734.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>24700.0</td>\n",
       "      <td>24700.0</td>\n",
       "      <td>24700.0</td>\n",
       "      <td>11.99</td>\n",
       "      <td>820.28</td>\n",
       "      <td>2</td>\n",
       "      <td>10.0</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>65000.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Fully Paid</td>\n",
       "      <td>SD</td>\n",
       "      <td>16.06</td>\n",
       "      <td>1.0</td>\n",
       "      <td>715.0</td>\n",
       "      <td>719.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21470.0</td>\n",
       "      <td>19.2</td>\n",
       "      <td>38.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>25679.660000</td>\n",
       "      <td>25679.66</td>\n",
       "      <td>24700.00</td>\n",
       "      <td>979.66</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>926.35</td>\n",
       "      <td>699.0</td>\n",
       "      <td>695.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>204396.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>18005.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6472.0</td>\n",
       "      <td>29.0</td>\n",
       "      <td>111800.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9733.0</td>\n",
       "      <td>57830.0</td>\n",
       "      <td>27.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>113.0</td>\n",
       "      <td>192.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>27.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>97.4</td>\n",
       "      <td>7.7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>314017.0</td>\n",
       "      <td>39475.0</td>\n",
       "      <td>79300.0</td>\n",
       "      <td>24667.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20000.0</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>10.78</td>\n",
       "      <td>432.66</td>\n",
       "      <td>1</td>\n",
       "      <td>10.0</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>63000.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Fully Paid</td>\n",
       "      <td>IL</td>\n",
       "      <td>10.78</td>\n",
       "      <td>0.0</td>\n",
       "      <td>695.0</td>\n",
       "      <td>699.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7869.0</td>\n",
       "      <td>56.2</td>\n",
       "      <td>18.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>22705.924294</td>\n",
       "      <td>22705.92</td>\n",
       "      <td>20000.00</td>\n",
       "      <td>2705.92</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>15813.30</td>\n",
       "      <td>704.0</td>\n",
       "      <td>700.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>189699.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>10827.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2081.0</td>\n",
       "      <td>65.0</td>\n",
       "      <td>14000.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>31617.0</td>\n",
       "      <td>2737.0</td>\n",
       "      <td>55.9</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>125.0</td>\n",
       "      <td>184.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>101.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>218418.0</td>\n",
       "      <td>18696.0</td>\n",
       "      <td>6200.0</td>\n",
       "      <td>14877.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>35000.0</td>\n",
       "      <td>35000.0</td>\n",
       "      <td>35000.0</td>\n",
       "      <td>14.85</td>\n",
       "      <td>829.90</td>\n",
       "      <td>2</td>\n",
       "      <td>10.0</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>110000.0</td>\n",
       "      <td>Source Verified</td>\n",
       "      <td>Current</td>\n",
       "      <td>NJ</td>\n",
       "      <td>17.06</td>\n",
       "      <td>0.0</td>\n",
       "      <td>785.0</td>\n",
       "      <td>789.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7802.0</td>\n",
       "      <td>11.6</td>\n",
       "      <td>17.0</td>\n",
       "      <td>15897.65</td>\n",
       "      <td>15897.65</td>\n",
       "      <td>31464.010000</td>\n",
       "      <td>31464.01</td>\n",
       "      <td>19102.35</td>\n",
       "      <td>12361.66</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>829.90</td>\n",
       "      <td>679.0</td>\n",
       "      <td>675.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>301500.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>12609.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6987.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>67300.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>23192.0</td>\n",
       "      <td>54962.0</td>\n",
       "      <td>12.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>87.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>381215.0</td>\n",
       "      <td>52226.0</td>\n",
       "      <td>62500.0</td>\n",
       "      <td>18000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10400.0</td>\n",
       "      <td>10400.0</td>\n",
       "      <td>10400.0</td>\n",
       "      <td>22.45</td>\n",
       "      <td>289.91</td>\n",
       "      <td>4</td>\n",
       "      <td>3.0</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>104433.0</td>\n",
       "      <td>Source Verified</td>\n",
       "      <td>Fully Paid</td>\n",
       "      <td>PA</td>\n",
       "      <td>25.37</td>\n",
       "      <td>1.0</td>\n",
       "      <td>695.0</td>\n",
       "      <td>699.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21929.0</td>\n",
       "      <td>64.5</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>11740.500000</td>\n",
       "      <td>11740.50</td>\n",
       "      <td>10400.00</td>\n",
       "      <td>1340.50</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10128.96</td>\n",
       "      <td>704.0</td>\n",
       "      <td>700.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>331730.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>73839.0</td>\n",
       "      <td>84.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>9702.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>34000.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>27644.0</td>\n",
       "      <td>4567.0</td>\n",
       "      <td>77.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>128.0</td>\n",
       "      <td>210.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>96.6</td>\n",
       "      <td>60.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>439570.0</td>\n",
       "      <td>95768.0</td>\n",
       "      <td>20300.0</td>\n",
       "      <td>88097.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   loan_amnt  funded_amnt  funded_amnt_inv  int_rate  installment  grade  \\\n",
       "0     3600.0       3600.0           3600.0     13.99       123.03      2   \n",
       "1    24700.0      24700.0          24700.0     11.99       820.28      2   \n",
       "2    20000.0      20000.0          20000.0     10.78       432.66      1   \n",
       "3    35000.0      35000.0          35000.0     14.85       829.90      2   \n",
       "4    10400.0      10400.0          10400.0     22.45       289.91      4   \n",
       "\n",
       "   emp_length home_ownership  annual_inc verification_status loan_status  \\\n",
       "0        10.0       MORTGAGE     55000.0        Not Verified  Fully Paid   \n",
       "1        10.0       MORTGAGE     65000.0        Not Verified  Fully Paid   \n",
       "2        10.0       MORTGAGE     63000.0        Not Verified  Fully Paid   \n",
       "3        10.0       MORTGAGE    110000.0     Source Verified     Current   \n",
       "4         3.0       MORTGAGE    104433.0     Source Verified  Fully Paid   \n",
       "\n",
       "  addr_state    dti  delinq_2yrs  fico_range_low  fico_range_high  \\\n",
       "0         PA   5.91          0.0           675.0            679.0   \n",
       "1         SD  16.06          1.0           715.0            719.0   \n",
       "2         IL  10.78          0.0           695.0            699.0   \n",
       "3         NJ  17.06          0.0           785.0            789.0   \n",
       "4         PA  25.37          1.0           695.0            699.0   \n",
       "\n",
       "   inq_last_6mths  open_acc  pub_rec  revol_bal  revol_util  total_acc  \\\n",
       "0             1.0       7.0      0.0     2765.0        29.7       13.0   \n",
       "1             4.0      22.0      0.0    21470.0        19.2       38.0   \n",
       "2             0.0       6.0      0.0     7869.0        56.2       18.0   \n",
       "3             0.0      13.0      0.0     7802.0        11.6       17.0   \n",
       "4             3.0      12.0      0.0    21929.0        64.5       35.0   \n",
       "\n",
       "   out_prncp  out_prncp_inv   total_pymnt  total_pymnt_inv  total_rec_prncp  \\\n",
       "0       0.00           0.00   4421.723917          4421.72          3600.00   \n",
       "1       0.00           0.00  25679.660000         25679.66         24700.00   \n",
       "2       0.00           0.00  22705.924294         22705.92         20000.00   \n",
       "3   15897.65       15897.65  31464.010000         31464.01         19102.35   \n",
       "4       0.00           0.00  11740.500000         11740.50         10400.00   \n",
       "\n",
       "   total_rec_int  total_rec_late_fee  recoveries  collection_recovery_fee  \\\n",
       "0         821.72                 0.0         0.0                      0.0   \n",
       "1         979.66                 0.0         0.0                      0.0   \n",
       "2        2705.92                 0.0         0.0                      0.0   \n",
       "3       12361.66                 0.0         0.0                      0.0   \n",
       "4        1340.50                 0.0         0.0                      0.0   \n",
       "\n",
       "   last_pymnt_amnt  last_fico_range_high  last_fico_range_low  \\\n",
       "0           122.67                 564.0                560.0   \n",
       "1           926.35                 699.0                695.0   \n",
       "2         15813.30                 704.0                700.0   \n",
       "3           829.90                 679.0                675.0   \n",
       "4         10128.96                 704.0                700.0   \n",
       "\n",
       "   collections_12_mths_ex_med  acc_now_delinq  tot_coll_amt  tot_cur_bal  \\\n",
       "0                         0.0             0.0         722.0     144904.0   \n",
       "1                         0.0             0.0           0.0     204396.0   \n",
       "2                         0.0             0.0           0.0     189699.0   \n",
       "3                         0.0             0.0           0.0     301500.0   \n",
       "4                         0.0             0.0           0.0     331730.0   \n",
       "\n",
       "   open_acc_6m  open_act_il  open_il_12m  open_il_24m  mths_since_rcnt_il  \\\n",
       "0          2.0          2.0          0.0          1.0                21.0   \n",
       "1          1.0          1.0          0.0          1.0                19.0   \n",
       "2          0.0          1.0          0.0          4.0                19.0   \n",
       "3          1.0          1.0          0.0          1.0                23.0   \n",
       "4          1.0          3.0          0.0          3.0                14.0   \n",
       "\n",
       "   total_bal_il  il_util  open_rv_12m  open_rv_24m  max_bal_bc  all_util  \\\n",
       "0        4981.0     36.0          3.0          3.0       722.0      34.0   \n",
       "1       18005.0     73.0          2.0          3.0      6472.0      29.0   \n",
       "2       10827.0     73.0          0.0          2.0      2081.0      65.0   \n",
       "3       12609.0     70.0          1.0          1.0      6987.0      45.0   \n",
       "4       73839.0     84.0          4.0          7.0      9702.0      78.0   \n",
       "\n",
       "   total_rev_hi_lim  inq_fi  total_cu_tl  inq_last_12m  acc_open_past_24mths  \\\n",
       "0            9300.0     3.0          1.0           4.0                   4.0   \n",
       "1          111800.0     0.0          0.0           6.0                   4.0   \n",
       "2           14000.0     2.0          5.0           1.0                   6.0   \n",
       "3           67300.0     0.0          1.0           0.0                   2.0   \n",
       "4           34000.0     2.0          1.0           3.0                  10.0   \n",
       "\n",
       "   avg_cur_bal  bc_open_to_buy  bc_util  chargeoff_within_12_mths  \\\n",
       "0      20701.0          1506.0     37.2                       0.0   \n",
       "1       9733.0         57830.0     27.1                       0.0   \n",
       "2      31617.0          2737.0     55.9                       0.0   \n",
       "3      23192.0         54962.0     12.1                       0.0   \n",
       "4      27644.0          4567.0     77.5                       0.0   \n",
       "\n",
       "   delinq_amnt  mo_sin_old_il_acct  mo_sin_old_rev_tl_op  \\\n",
       "0          0.0               148.0                 128.0   \n",
       "1          0.0               113.0                 192.0   \n",
       "2          0.0               125.0                 184.0   \n",
       "3          0.0                36.0                  87.0   \n",
       "4          0.0               128.0                 210.0   \n",
       "\n",
       "   mo_sin_rcnt_rev_tl_op  mo_sin_rcnt_tl  mort_acc  mths_since_recent_bc  \\\n",
       "0                    3.0             3.0       1.0                   4.0   \n",
       "1                    2.0             2.0       4.0                   2.0   \n",
       "2                   14.0            14.0       5.0                 101.0   \n",
       "3                    2.0             2.0       1.0                   2.0   \n",
       "4                    4.0             4.0       6.0                   4.0   \n",
       "\n",
       "   mths_since_recent_inq  num_accts_ever_120_pd  num_actv_bc_tl  \\\n",
       "0                    4.0                    2.0             2.0   \n",
       "1                    0.0                    0.0             5.0   \n",
       "2                   10.0                    0.0             2.0   \n",
       "3                    NaN                    0.0             4.0   \n",
       "4                    1.0                    0.0             4.0   \n",
       "\n",
       "   num_actv_rev_tl  num_bc_sats  num_bc_tl  num_il_tl  num_op_rev_tl  \\\n",
       "0              4.0          2.0        5.0        3.0            4.0   \n",
       "1              5.0         13.0       17.0        6.0           20.0   \n",
       "2              3.0          2.0        4.0        6.0            4.0   \n",
       "3              5.0          8.0       10.0        2.0           10.0   \n",
       "4              6.0          5.0        9.0       10.0            7.0   \n",
       "\n",
       "   num_rev_accts  num_rev_tl_bal_gt_0  num_sats  num_tl_120dpd_2m  \\\n",
       "0            9.0                  4.0       7.0               0.0   \n",
       "1           27.0                  5.0      22.0               0.0   \n",
       "2            7.0                  3.0       6.0               0.0   \n",
       "3           13.0                  5.0      13.0               0.0   \n",
       "4           19.0                  6.0      12.0               0.0   \n",
       "\n",
       "   num_tl_30dpd  num_tl_90g_dpd_24m  num_tl_op_past_12m  pct_tl_nvr_dlq  \\\n",
       "0           0.0                 0.0                 3.0            76.9   \n",
       "1           0.0                 0.0                 2.0            97.4   \n",
       "2           0.0                 0.0                 0.0           100.0   \n",
       "3           0.0                 0.0                 1.0           100.0   \n",
       "4           0.0                 0.0                 4.0            96.6   \n",
       "\n",
       "   percent_bc_gt_75  pub_rec_bankruptcies  tax_liens  tot_hi_cred_lim  \\\n",
       "0               0.0                   0.0        0.0         178050.0   \n",
       "1               7.7                   0.0        0.0         314017.0   \n",
       "2              50.0                   0.0        0.0         218418.0   \n",
       "3               0.0                   0.0        0.0         381215.0   \n",
       "4              60.0                   0.0        0.0         439570.0   \n",
       "\n",
       "   total_bal_ex_mort  total_bc_limit  total_il_high_credit_limit  \n",
       "0             7746.0          2400.0                     13734.0  \n",
       "1            39475.0         79300.0                     24667.0  \n",
       "2            18696.0          6200.0                     14877.0  \n",
       "3            52226.0         62500.0                     18000.0  \n",
       "4            95768.0         20300.0                     88097.0  "
      ]
     },
     "execution_count": 148,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "51c85fe2",
   "metadata": {},
   "source": [
    "#### home_ownership"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 149,
   "id": "45eac3c5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MORTGAGE    1111450\n",
       "RENT         894929\n",
       "OWN          253057\n",
       "ANY             996\n",
       "OTHER           182\n",
       "NONE             54\n",
       "Name: home_ownership, dtype: int64"
      ]
     },
     "execution_count": 149,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# home_ownership\n",
    "data['home_ownership'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "id": "a29385fb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 150,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['home_ownership'].isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "id": "3fa90547",
   "metadata": {},
   "outputs": [],
   "source": [
    "home_ownership_replace = {\n",
    "    \"home_ownership\": {\n",
    "        \"MORTGAGE\": \"a\",\n",
    "        \"RENT\": \"b\",\n",
    "        \"OWN\": \"c\",\n",
    "        \"ANY\": \"d\",\n",
    "        \"OTHER\": \"e\",\n",
    "        \"NONE\": \"f\",\n",
    "    }\n",
    "}\n",
    "data = data.replace(home_ownership_replace)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3772012a",
   "metadata": {},
   "source": [
    "#### verification_status"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "id": "ad4ed74a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Source Verified    886231\n",
       "Not Verified       744806\n",
       "Verified           629631\n",
       "Name: verification_status, dtype: int64"
      ]
     },
     "execution_count": 152,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# verification_status\n",
    "data['verification_status'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "id": "e09fedaa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 153,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['verification_status'].isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "id": "0dd9f51f",
   "metadata": {},
   "outputs": [],
   "source": [
    "verification_status_replace = {\n",
    "    \"verification_status\": {\n",
    "        \"Source Verified\": \"a\",\n",
    "        \"Not Verified\": \"b\",\n",
    "        \"Verified\": \"c\",\n",
    "    }\n",
    "}\n",
    "data = data.replace(verification_status_replace)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b4ed68c5",
   "metadata": {},
   "source": [
    "#### loan_status"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "id": "a657329e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Fully Paid                                             1076751\n",
       "Current                                                 878317\n",
       "Charged Off                                             268559\n",
       "Late (31-120 days)                                       21467\n",
       "In Grace Period                                           8436\n",
       "Late (16-30 days)                                         4349\n",
       "Does not meet the credit policy. Status:Fully Paid        1988\n",
       "Does not meet the credit policy. Status:Charged Off        761\n",
       "Default                                                     40\n",
       "Name: loan_status, dtype: int64"
      ]
     },
     "execution_count": 155,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# loan_status\n",
    "data['loan_status'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "id": "27f13623",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 156,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['loan_status'].isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "id": "7c57d2f4",
   "metadata": {},
   "outputs": [],
   "source": [
    "loan_status_replace = {\n",
    "    \"loan_status\": {\n",
    "        \"Fully Paid\": \"a\",\n",
    "        \"Current\": \"b\",\n",
    "        \"Charged Off\": \"c\",\n",
    "        \"Late (31-120 days)\": \"d\",\n",
    "        \"In Grace Period\": \"e\",\n",
    "        \"Late (16-30 days)\": \"f\",\n",
    "        \"Does not meet the credit policy. Status:Fully Paid\": \"g\",\n",
    "        \"Does not meet the credit policy. Status:Charged Off\": \"h\",\n",
    "        \"Default\": \"i\"\n",
    "    }\n",
    "}\n",
    "data = data.replace(loan_status_replace)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0197152f",
   "metadata": {},
   "source": [
    "#### addr_state"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fac26ff3",
   "metadata": {},
   "outputs": [],
   "source": [
    "# addr_state\n",
    "data['addr_state'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "22920d17",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['addr_state'].isnull().sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0b91d6de",
   "metadata": {},
   "source": [
    "#### 离散值one-hot编码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "id": "9fc848ae",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.get_dummies(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "id": "5752dcc8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>funded_amnt</th>\n",
       "      <th>funded_amnt_inv</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>installment</th>\n",
       "      <th>grade</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>annual_inc</th>\n",
       "      <th>dti</th>\n",
       "      <th>delinq_2yrs</th>\n",
       "      <th>fico_range_low</th>\n",
       "      <th>fico_range_high</th>\n",
       "      <th>inq_last_6mths</th>\n",
       "      <th>open_acc</th>\n",
       "      <th>pub_rec</th>\n",
       "      <th>revol_bal</th>\n",
       "      <th>revol_util</th>\n",
       "      <th>total_acc</th>\n",
       "      <th>out_prncp</th>\n",
       "      <th>out_prncp_inv</th>\n",
       "      <th>total_pymnt</th>\n",
       "      <th>total_pymnt_inv</th>\n",
       "      <th>total_rec_prncp</th>\n",
       "      <th>total_rec_int</th>\n",
       "      <th>total_rec_late_fee</th>\n",
       "      <th>recoveries</th>\n",
       "      <th>collection_recovery_fee</th>\n",
       "      <th>last_pymnt_amnt</th>\n",
       "      <th>last_fico_range_high</th>\n",
       "      <th>last_fico_range_low</th>\n",
       "      <th>collections_12_mths_ex_med</th>\n",
       "      <th>acc_now_delinq</th>\n",
       "      <th>tot_coll_amt</th>\n",
       "      <th>tot_cur_bal</th>\n",
       "      <th>open_acc_6m</th>\n",
       "      <th>open_act_il</th>\n",
       "      <th>open_il_12m</th>\n",
       "      <th>open_il_24m</th>\n",
       "      <th>mths_since_rcnt_il</th>\n",
       "      <th>total_bal_il</th>\n",
       "      <th>il_util</th>\n",
       "      <th>open_rv_12m</th>\n",
       "      <th>open_rv_24m</th>\n",
       "      <th>max_bal_bc</th>\n",
       "      <th>all_util</th>\n",
       "      <th>total_rev_hi_lim</th>\n",
       "      <th>inq_fi</th>\n",
       "      <th>total_cu_tl</th>\n",
       "      <th>inq_last_12m</th>\n",
       "      <th>acc_open_past_24mths</th>\n",
       "      <th>avg_cur_bal</th>\n",
       "      <th>bc_open_to_buy</th>\n",
       "      <th>bc_util</th>\n",
       "      <th>chargeoff_within_12_mths</th>\n",
       "      <th>delinq_amnt</th>\n",
       "      <th>mo_sin_old_il_acct</th>\n",
       "      <th>mo_sin_old_rev_tl_op</th>\n",
       "      <th>mo_sin_rcnt_rev_tl_op</th>\n",
       "      <th>mo_sin_rcnt_tl</th>\n",
       "      <th>mort_acc</th>\n",
       "      <th>mths_since_recent_bc</th>\n",
       "      <th>mths_since_recent_inq</th>\n",
       "      <th>num_accts_ever_120_pd</th>\n",
       "      <th>num_actv_bc_tl</th>\n",
       "      <th>num_actv_rev_tl</th>\n",
       "      <th>num_bc_sats</th>\n",
       "      <th>num_bc_tl</th>\n",
       "      <th>num_il_tl</th>\n",
       "      <th>num_op_rev_tl</th>\n",
       "      <th>num_rev_accts</th>\n",
       "      <th>num_rev_tl_bal_gt_0</th>\n",
       "      <th>num_sats</th>\n",
       "      <th>num_tl_120dpd_2m</th>\n",
       "      <th>num_tl_30dpd</th>\n",
       "      <th>num_tl_90g_dpd_24m</th>\n",
       "      <th>num_tl_op_past_12m</th>\n",
       "      <th>pct_tl_nvr_dlq</th>\n",
       "      <th>percent_bc_gt_75</th>\n",
       "      <th>pub_rec_bankruptcies</th>\n",
       "      <th>tax_liens</th>\n",
       "      <th>tot_hi_cred_lim</th>\n",
       "      <th>total_bal_ex_mort</th>\n",
       "      <th>total_bc_limit</th>\n",
       "      <th>total_il_high_credit_limit</th>\n",
       "      <th>home_ownership_a</th>\n",
       "      <th>home_ownership_b</th>\n",
       "      <th>home_ownership_c</th>\n",
       "      <th>home_ownership_d</th>\n",
       "      <th>home_ownership_e</th>\n",
       "      <th>home_ownership_f</th>\n",
       "      <th>verification_status_a</th>\n",
       "      <th>verification_status_b</th>\n",
       "      <th>verification_status_c</th>\n",
       "      <th>loan_status_a</th>\n",
       "      <th>loan_status_b</th>\n",
       "      <th>loan_status_c</th>\n",
       "      <th>loan_status_d</th>\n",
       "      <th>loan_status_e</th>\n",
       "      <th>loan_status_f</th>\n",
       "      <th>loan_status_g</th>\n",
       "      <th>loan_status_h</th>\n",
       "      <th>loan_status_i</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3600.0</td>\n",
       "      <td>3600.0</td>\n",
       "      <td>3600.0</td>\n",
       "      <td>13.99</td>\n",
       "      <td>123.03</td>\n",
       "      <td>2</td>\n",
       "      <td>10.0</td>\n",
       "      <td>55000.0</td>\n",
       "      <td>5.91</td>\n",
       "      <td>0.0</td>\n",
       "      <td>675.0</td>\n",
       "      <td>679.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2765.0</td>\n",
       "      <td>29.7</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>4421.723917</td>\n",
       "      <td>4421.72</td>\n",
       "      <td>3600.00</td>\n",
       "      <td>821.72</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>122.67</td>\n",
       "      <td>564.0</td>\n",
       "      <td>560.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>722.0</td>\n",
       "      <td>144904.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>4981.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>722.0</td>\n",
       "      <td>34.0</td>\n",
       "      <td>9300.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>20701.0</td>\n",
       "      <td>1506.0</td>\n",
       "      <td>37.2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>128.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>76.9</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>178050.0</td>\n",
       "      <td>7746.0</td>\n",
       "      <td>2400.0</td>\n",
       "      <td>13734.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>24700.0</td>\n",
       "      <td>24700.0</td>\n",
       "      <td>24700.0</td>\n",
       "      <td>11.99</td>\n",
       "      <td>820.28</td>\n",
       "      <td>2</td>\n",
       "      <td>10.0</td>\n",
       "      <td>65000.0</td>\n",
       "      <td>16.06</td>\n",
       "      <td>1.0</td>\n",
       "      <td>715.0</td>\n",
       "      <td>719.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21470.0</td>\n",
       "      <td>19.2</td>\n",
       "      <td>38.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>25679.660000</td>\n",
       "      <td>25679.66</td>\n",
       "      <td>24700.00</td>\n",
       "      <td>979.66</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>926.35</td>\n",
       "      <td>699.0</td>\n",
       "      <td>695.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>204396.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>18005.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6472.0</td>\n",
       "      <td>29.0</td>\n",
       "      <td>111800.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9733.0</td>\n",
       "      <td>57830.0</td>\n",
       "      <td>27.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>113.0</td>\n",
       "      <td>192.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>27.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>97.4</td>\n",
       "      <td>7.7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>314017.0</td>\n",
       "      <td>39475.0</td>\n",
       "      <td>79300.0</td>\n",
       "      <td>24667.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20000.0</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>10.78</td>\n",
       "      <td>432.66</td>\n",
       "      <td>1</td>\n",
       "      <td>10.0</td>\n",
       "      <td>63000.0</td>\n",
       "      <td>10.78</td>\n",
       "      <td>0.0</td>\n",
       "      <td>695.0</td>\n",
       "      <td>699.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7869.0</td>\n",
       "      <td>56.2</td>\n",
       "      <td>18.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>22705.924294</td>\n",
       "      <td>22705.92</td>\n",
       "      <td>20000.00</td>\n",
       "      <td>2705.92</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>15813.30</td>\n",
       "      <td>704.0</td>\n",
       "      <td>700.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>189699.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>10827.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2081.0</td>\n",
       "      <td>65.0</td>\n",
       "      <td>14000.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>31617.0</td>\n",
       "      <td>2737.0</td>\n",
       "      <td>55.9</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>125.0</td>\n",
       "      <td>184.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>101.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>218418.0</td>\n",
       "      <td>18696.0</td>\n",
       "      <td>6200.0</td>\n",
       "      <td>14877.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>35000.0</td>\n",
       "      <td>35000.0</td>\n",
       "      <td>35000.0</td>\n",
       "      <td>14.85</td>\n",
       "      <td>829.90</td>\n",
       "      <td>2</td>\n",
       "      <td>10.0</td>\n",
       "      <td>110000.0</td>\n",
       "      <td>17.06</td>\n",
       "      <td>0.0</td>\n",
       "      <td>785.0</td>\n",
       "      <td>789.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7802.0</td>\n",
       "      <td>11.6</td>\n",
       "      <td>17.0</td>\n",
       "      <td>15897.65</td>\n",
       "      <td>15897.65</td>\n",
       "      <td>31464.010000</td>\n",
       "      <td>31464.01</td>\n",
       "      <td>19102.35</td>\n",
       "      <td>12361.66</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>829.90</td>\n",
       "      <td>679.0</td>\n",
       "      <td>675.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>301500.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>12609.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6987.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>67300.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>23192.0</td>\n",
       "      <td>54962.0</td>\n",
       "      <td>12.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>87.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>381215.0</td>\n",
       "      <td>52226.0</td>\n",
       "      <td>62500.0</td>\n",
       "      <td>18000.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10400.0</td>\n",
       "      <td>10400.0</td>\n",
       "      <td>10400.0</td>\n",
       "      <td>22.45</td>\n",
       "      <td>289.91</td>\n",
       "      <td>4</td>\n",
       "      <td>3.0</td>\n",
       "      <td>104433.0</td>\n",
       "      <td>25.37</td>\n",
       "      <td>1.0</td>\n",
       "      <td>695.0</td>\n",
       "      <td>699.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21929.0</td>\n",
       "      <td>64.5</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>11740.500000</td>\n",
       "      <td>11740.50</td>\n",
       "      <td>10400.00</td>\n",
       "      <td>1340.50</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10128.96</td>\n",
       "      <td>704.0</td>\n",
       "      <td>700.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>331730.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>73839.0</td>\n",
       "      <td>84.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>9702.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>34000.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>27644.0</td>\n",
       "      <td>4567.0</td>\n",
       "      <td>77.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>128.0</td>\n",
       "      <td>210.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>96.6</td>\n",
       "      <td>60.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>439570.0</td>\n",
       "      <td>95768.0</td>\n",
       "      <td>20300.0</td>\n",
       "      <td>88097.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   loan_amnt  funded_amnt  funded_amnt_inv  int_rate  installment  grade  \\\n",
       "0     3600.0       3600.0           3600.0     13.99       123.03      2   \n",
       "1    24700.0      24700.0          24700.0     11.99       820.28      2   \n",
       "2    20000.0      20000.0          20000.0     10.78       432.66      1   \n",
       "3    35000.0      35000.0          35000.0     14.85       829.90      2   \n",
       "4    10400.0      10400.0          10400.0     22.45       289.91      4   \n",
       "\n",
       "   emp_length  annual_inc    dti  delinq_2yrs  fico_range_low  \\\n",
       "0        10.0     55000.0   5.91          0.0           675.0   \n",
       "1        10.0     65000.0  16.06          1.0           715.0   \n",
       "2        10.0     63000.0  10.78          0.0           695.0   \n",
       "3        10.0    110000.0  17.06          0.0           785.0   \n",
       "4         3.0    104433.0  25.37          1.0           695.0   \n",
       "\n",
       "   fico_range_high  inq_last_6mths  open_acc  pub_rec  revol_bal  revol_util  \\\n",
       "0            679.0             1.0       7.0      0.0     2765.0        29.7   \n",
       "1            719.0             4.0      22.0      0.0    21470.0        19.2   \n",
       "2            699.0             0.0       6.0      0.0     7869.0        56.2   \n",
       "3            789.0             0.0      13.0      0.0     7802.0        11.6   \n",
       "4            699.0             3.0      12.0      0.0    21929.0        64.5   \n",
       "\n",
       "   total_acc  out_prncp  out_prncp_inv   total_pymnt  total_pymnt_inv  \\\n",
       "0       13.0       0.00           0.00   4421.723917          4421.72   \n",
       "1       38.0       0.00           0.00  25679.660000         25679.66   \n",
       "2       18.0       0.00           0.00  22705.924294         22705.92   \n",
       "3       17.0   15897.65       15897.65  31464.010000         31464.01   \n",
       "4       35.0       0.00           0.00  11740.500000         11740.50   \n",
       "\n",
       "   total_rec_prncp  total_rec_int  total_rec_late_fee  recoveries  \\\n",
       "0          3600.00         821.72                 0.0         0.0   \n",
       "1         24700.00         979.66                 0.0         0.0   \n",
       "2         20000.00        2705.92                 0.0         0.0   \n",
       "3         19102.35       12361.66                 0.0         0.0   \n",
       "4         10400.00        1340.50                 0.0         0.0   \n",
       "\n",
       "   collection_recovery_fee  last_pymnt_amnt  last_fico_range_high  \\\n",
       "0                      0.0           122.67                 564.0   \n",
       "1                      0.0           926.35                 699.0   \n",
       "2                      0.0         15813.30                 704.0   \n",
       "3                      0.0           829.90                 679.0   \n",
       "4                      0.0         10128.96                 704.0   \n",
       "\n",
       "   last_fico_range_low  collections_12_mths_ex_med  acc_now_delinq  \\\n",
       "0                560.0                         0.0             0.0   \n",
       "1                695.0                         0.0             0.0   \n",
       "2                700.0                         0.0             0.0   \n",
       "3                675.0                         0.0             0.0   \n",
       "4                700.0                         0.0             0.0   \n",
       "\n",
       "   tot_coll_amt  tot_cur_bal  open_acc_6m  open_act_il  open_il_12m  \\\n",
       "0         722.0     144904.0          2.0          2.0          0.0   \n",
       "1           0.0     204396.0          1.0          1.0          0.0   \n",
       "2           0.0     189699.0          0.0          1.0          0.0   \n",
       "3           0.0     301500.0          1.0          1.0          0.0   \n",
       "4           0.0     331730.0          1.0          3.0          0.0   \n",
       "\n",
       "   open_il_24m  mths_since_rcnt_il  total_bal_il  il_util  open_rv_12m  \\\n",
       "0          1.0                21.0        4981.0     36.0          3.0   \n",
       "1          1.0                19.0       18005.0     73.0          2.0   \n",
       "2          4.0                19.0       10827.0     73.0          0.0   \n",
       "3          1.0                23.0       12609.0     70.0          1.0   \n",
       "4          3.0                14.0       73839.0     84.0          4.0   \n",
       "\n",
       "   open_rv_24m  max_bal_bc  all_util  total_rev_hi_lim  inq_fi  total_cu_tl  \\\n",
       "0          3.0       722.0      34.0            9300.0     3.0          1.0   \n",
       "1          3.0      6472.0      29.0          111800.0     0.0          0.0   \n",
       "2          2.0      2081.0      65.0           14000.0     2.0          5.0   \n",
       "3          1.0      6987.0      45.0           67300.0     0.0          1.0   \n",
       "4          7.0      9702.0      78.0           34000.0     2.0          1.0   \n",
       "\n",
       "   inq_last_12m  acc_open_past_24mths  avg_cur_bal  bc_open_to_buy  bc_util  \\\n",
       "0           4.0                   4.0      20701.0          1506.0     37.2   \n",
       "1           6.0                   4.0       9733.0         57830.0     27.1   \n",
       "2           1.0                   6.0      31617.0          2737.0     55.9   \n",
       "3           0.0                   2.0      23192.0         54962.0     12.1   \n",
       "4           3.0                  10.0      27644.0          4567.0     77.5   \n",
       "\n",
       "   chargeoff_within_12_mths  delinq_amnt  mo_sin_old_il_acct  \\\n",
       "0                       0.0          0.0               148.0   \n",
       "1                       0.0          0.0               113.0   \n",
       "2                       0.0          0.0               125.0   \n",
       "3                       0.0          0.0                36.0   \n",
       "4                       0.0          0.0               128.0   \n",
       "\n",
       "   mo_sin_old_rev_tl_op  mo_sin_rcnt_rev_tl_op  mo_sin_rcnt_tl  mort_acc  \\\n",
       "0                 128.0                    3.0             3.0       1.0   \n",
       "1                 192.0                    2.0             2.0       4.0   \n",
       "2                 184.0                   14.0            14.0       5.0   \n",
       "3                  87.0                    2.0             2.0       1.0   \n",
       "4                 210.0                    4.0             4.0       6.0   \n",
       "\n",
       "   mths_since_recent_bc  mths_since_recent_inq  num_accts_ever_120_pd  \\\n",
       "0                   4.0                    4.0                    2.0   \n",
       "1                   2.0                    0.0                    0.0   \n",
       "2                 101.0                   10.0                    0.0   \n",
       "3                   2.0                    NaN                    0.0   \n",
       "4                   4.0                    1.0                    0.0   \n",
       "\n",
       "   num_actv_bc_tl  num_actv_rev_tl  num_bc_sats  num_bc_tl  num_il_tl  \\\n",
       "0             2.0              4.0          2.0        5.0        3.0   \n",
       "1             5.0              5.0         13.0       17.0        6.0   \n",
       "2             2.0              3.0          2.0        4.0        6.0   \n",
       "3             4.0              5.0          8.0       10.0        2.0   \n",
       "4             4.0              6.0          5.0        9.0       10.0   \n",
       "\n",
       "   num_op_rev_tl  num_rev_accts  num_rev_tl_bal_gt_0  num_sats  \\\n",
       "0            4.0            9.0                  4.0       7.0   \n",
       "1           20.0           27.0                  5.0      22.0   \n",
       "2            4.0            7.0                  3.0       6.0   \n",
       "3           10.0           13.0                  5.0      13.0   \n",
       "4            7.0           19.0                  6.0      12.0   \n",
       "\n",
       "   num_tl_120dpd_2m  num_tl_30dpd  num_tl_90g_dpd_24m  num_tl_op_past_12m  \\\n",
       "0               0.0           0.0                 0.0                 3.0   \n",
       "1               0.0           0.0                 0.0                 2.0   \n",
       "2               0.0           0.0                 0.0                 0.0   \n",
       "3               0.0           0.0                 0.0                 1.0   \n",
       "4               0.0           0.0                 0.0                 4.0   \n",
       "\n",
       "   pct_tl_nvr_dlq  percent_bc_gt_75  pub_rec_bankruptcies  tax_liens  \\\n",
       "0            76.9               0.0                   0.0        0.0   \n",
       "1            97.4               7.7                   0.0        0.0   \n",
       "2           100.0              50.0                   0.0        0.0   \n",
       "3           100.0               0.0                   0.0        0.0   \n",
       "4            96.6              60.0                   0.0        0.0   \n",
       "\n",
       "   tot_hi_cred_lim  total_bal_ex_mort  total_bc_limit  \\\n",
       "0         178050.0             7746.0          2400.0   \n",
       "1         314017.0            39475.0         79300.0   \n",
       "2         218418.0            18696.0          6200.0   \n",
       "3         381215.0            52226.0         62500.0   \n",
       "4         439570.0            95768.0         20300.0   \n",
       "\n",
       "   total_il_high_credit_limit  home_ownership_a  home_ownership_b  \\\n",
       "0                     13734.0                 1                 0   \n",
       "1                     24667.0                 1                 0   \n",
       "2                     14877.0                 1                 0   \n",
       "3                     18000.0                 1                 0   \n",
       "4                     88097.0                 1                 0   \n",
       "\n",
       "   home_ownership_c  home_ownership_d  home_ownership_e  home_ownership_f  \\\n",
       "0                 0                 0                 0                 0   \n",
       "1                 0                 0                 0                 0   \n",
       "2                 0                 0                 0                 0   \n",
       "3                 0                 0                 0                 0   \n",
       "4                 0                 0                 0                 0   \n",
       "\n",
       "   verification_status_a  verification_status_b  verification_status_c  \\\n",
       "0                      0                      1                      0   \n",
       "1                      0                      1                      0   \n",
       "2                      0                      1                      0   \n",
       "3                      1                      0                      0   \n",
       "4                      1                      0                      0   \n",
       "\n",
       "   loan_status_a  loan_status_b  loan_status_c  loan_status_d  loan_status_e  \\\n",
       "0              1              0              0              0              0   \n",
       "1              1              0              0              0              0   \n",
       "2              1              0              0              0              0   \n",
       "3              0              1              0              0              0   \n",
       "4              1              0              0              0              0   \n",
       "\n",
       "   loan_status_f  loan_status_g  loan_status_h  loan_status_i  \n",
       "0              0              0              0              0  \n",
       "1              0              0              0              0  \n",
       "2              0              0              0              0  \n",
       "3              0              0              0              0  \n",
       "4              0              0              0              0  "
      ]
     },
     "execution_count": 161,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "070f4310",
   "metadata": {},
   "source": [
    "### 缺失值处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "id": "d5d7eb91",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "loan_amnt                           0\n",
       "funded_amnt                         0\n",
       "funded_amnt_inv                     0\n",
       "int_rate                            0\n",
       "installment                         0\n",
       "grade                               0\n",
       "emp_length                          0\n",
       "annual_inc                          4\n",
       "dti                              1711\n",
       "delinq_2yrs                        29\n",
       "fico_range_low                      0\n",
       "fico_range_high                     0\n",
       "inq_last_6mths                     30\n",
       "open_acc                           29\n",
       "pub_rec                            29\n",
       "revol_bal                           0\n",
       "revol_util                       1802\n",
       "total_acc                          29\n",
       "out_prncp                           0\n",
       "out_prncp_inv                       0\n",
       "total_pymnt                         0\n",
       "total_pymnt_inv                     0\n",
       "total_rec_prncp                     0\n",
       "total_rec_int                       0\n",
       "total_rec_late_fee                  0\n",
       "recoveries                          0\n",
       "collection_recovery_fee             0\n",
       "last_pymnt_amnt                     0\n",
       "last_fico_range_high                0\n",
       "last_fico_range_low                 0\n",
       "collections_12_mths_ex_med        145\n",
       "acc_now_delinq                     29\n",
       "tot_coll_amt                    70276\n",
       "tot_cur_bal                     70276\n",
       "open_acc_6m                    866130\n",
       "open_act_il                    866129\n",
       "open_il_12m                    866129\n",
       "open_il_24m                    866129\n",
       "mths_since_rcnt_il             909924\n",
       "total_bal_il                   866129\n",
       "il_util                       1068850\n",
       "open_rv_12m                    866129\n",
       "open_rv_24m                    866129\n",
       "max_bal_bc                     866129\n",
       "all_util                       866348\n",
       "total_rev_hi_lim                70276\n",
       "inq_fi                         866129\n",
       "total_cu_tl                    866130\n",
       "inq_last_12m                   866130\n",
       "acc_open_past_24mths            50030\n",
       "avg_cur_bal                     70346\n",
       "bc_open_to_buy                  74935\n",
       "bc_util                         76071\n",
       "chargeoff_within_12_mths          145\n",
       "delinq_amnt                        29\n",
       "mo_sin_old_il_acct             139071\n",
       "mo_sin_old_rev_tl_op            70277\n",
       "mo_sin_rcnt_rev_tl_op           70277\n",
       "mo_sin_rcnt_tl                  70276\n",
       "mort_acc                        50030\n",
       "mths_since_recent_bc            73412\n",
       "mths_since_recent_inq          295435\n",
       "num_accts_ever_120_pd           70276\n",
       "num_actv_bc_tl                  70276\n",
       "num_actv_rev_tl                 70276\n",
       "num_bc_sats                     58590\n",
       "num_bc_tl                       70276\n",
       "num_il_tl                       70276\n",
       "num_op_rev_tl                   70276\n",
       "num_rev_accts                   70277\n",
       "num_rev_tl_bal_gt_0             70276\n",
       "num_sats                        58590\n",
       "num_tl_120dpd_2m               153657\n",
       "num_tl_30dpd                    70276\n",
       "num_tl_90g_dpd_24m              70276\n",
       "num_tl_op_past_12m              70276\n",
       "pct_tl_nvr_dlq                  70431\n",
       "percent_bc_gt_75                75379\n",
       "pub_rec_bankruptcies             1365\n",
       "tax_liens                         105\n",
       "tot_hi_cred_lim                 70276\n",
       "total_bal_ex_mort               50030\n",
       "total_bc_limit                  50030\n",
       "total_il_high_credit_limit      70276\n",
       "home_ownership_a                    0\n",
       "home_ownership_b                    0\n",
       "home_ownership_c                    0\n",
       "home_ownership_d                    0\n",
       "home_ownership_e                    0\n",
       "home_ownership_f                    0\n",
       "verification_status_a               0\n",
       "verification_status_b               0\n",
       "verification_status_c               0\n",
       "loan_status_a                       0\n",
       "loan_status_b                       0\n",
       "loan_status_c                       0\n",
       "loan_status_d                       0\n",
       "loan_status_e                       0\n",
       "loan_status_f                       0\n",
       "loan_status_g                       0\n",
       "loan_status_h                       0\n",
       "loan_status_i                       0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 162,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "id": "a450aafe",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data.fillna(data.mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "id": "37bfd424",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>funded_amnt</th>\n",
       "      <th>funded_amnt_inv</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>installment</th>\n",
       "      <th>grade</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>annual_inc</th>\n",
       "      <th>dti</th>\n",
       "      <th>delinq_2yrs</th>\n",
       "      <th>fico_range_low</th>\n",
       "      <th>fico_range_high</th>\n",
       "      <th>inq_last_6mths</th>\n",
       "      <th>open_acc</th>\n",
       "      <th>pub_rec</th>\n",
       "      <th>revol_bal</th>\n",
       "      <th>revol_util</th>\n",
       "      <th>total_acc</th>\n",
       "      <th>out_prncp</th>\n",
       "      <th>out_prncp_inv</th>\n",
       "      <th>total_pymnt</th>\n",
       "      <th>total_pymnt_inv</th>\n",
       "      <th>total_rec_prncp</th>\n",
       "      <th>total_rec_int</th>\n",
       "      <th>total_rec_late_fee</th>\n",
       "      <th>recoveries</th>\n",
       "      <th>collection_recovery_fee</th>\n",
       "      <th>last_pymnt_amnt</th>\n",
       "      <th>last_fico_range_high</th>\n",
       "      <th>last_fico_range_low</th>\n",
       "      <th>collections_12_mths_ex_med</th>\n",
       "      <th>acc_now_delinq</th>\n",
       "      <th>tot_coll_amt</th>\n",
       "      <th>tot_cur_bal</th>\n",
       "      <th>open_acc_6m</th>\n",
       "      <th>open_act_il</th>\n",
       "      <th>open_il_12m</th>\n",
       "      <th>open_il_24m</th>\n",
       "      <th>mths_since_rcnt_il</th>\n",
       "      <th>total_bal_il</th>\n",
       "      <th>il_util</th>\n",
       "      <th>open_rv_12m</th>\n",
       "      <th>open_rv_24m</th>\n",
       "      <th>max_bal_bc</th>\n",
       "      <th>all_util</th>\n",
       "      <th>total_rev_hi_lim</th>\n",
       "      <th>inq_fi</th>\n",
       "      <th>total_cu_tl</th>\n",
       "      <th>inq_last_12m</th>\n",
       "      <th>acc_open_past_24mths</th>\n",
       "      <th>avg_cur_bal</th>\n",
       "      <th>bc_open_to_buy</th>\n",
       "      <th>bc_util</th>\n",
       "      <th>chargeoff_within_12_mths</th>\n",
       "      <th>delinq_amnt</th>\n",
       "      <th>mo_sin_old_il_acct</th>\n",
       "      <th>mo_sin_old_rev_tl_op</th>\n",
       "      <th>mo_sin_rcnt_rev_tl_op</th>\n",
       "      <th>mo_sin_rcnt_tl</th>\n",
       "      <th>mort_acc</th>\n",
       "      <th>mths_since_recent_bc</th>\n",
       "      <th>mths_since_recent_inq</th>\n",
       "      <th>num_accts_ever_120_pd</th>\n",
       "      <th>num_actv_bc_tl</th>\n",
       "      <th>num_actv_rev_tl</th>\n",
       "      <th>num_bc_sats</th>\n",
       "      <th>num_bc_tl</th>\n",
       "      <th>num_il_tl</th>\n",
       "      <th>num_op_rev_tl</th>\n",
       "      <th>num_rev_accts</th>\n",
       "      <th>num_rev_tl_bal_gt_0</th>\n",
       "      <th>num_sats</th>\n",
       "      <th>num_tl_120dpd_2m</th>\n",
       "      <th>num_tl_30dpd</th>\n",
       "      <th>num_tl_90g_dpd_24m</th>\n",
       "      <th>num_tl_op_past_12m</th>\n",
       "      <th>pct_tl_nvr_dlq</th>\n",
       "      <th>percent_bc_gt_75</th>\n",
       "      <th>pub_rec_bankruptcies</th>\n",
       "      <th>tax_liens</th>\n",
       "      <th>tot_hi_cred_lim</th>\n",
       "      <th>total_bal_ex_mort</th>\n",
       "      <th>total_bc_limit</th>\n",
       "      <th>total_il_high_credit_limit</th>\n",
       "      <th>home_ownership_a</th>\n",
       "      <th>home_ownership_b</th>\n",
       "      <th>home_ownership_c</th>\n",
       "      <th>home_ownership_d</th>\n",
       "      <th>home_ownership_e</th>\n",
       "      <th>home_ownership_f</th>\n",
       "      <th>verification_status_a</th>\n",
       "      <th>verification_status_b</th>\n",
       "      <th>verification_status_c</th>\n",
       "      <th>loan_status_a</th>\n",
       "      <th>loan_status_b</th>\n",
       "      <th>loan_status_c</th>\n",
       "      <th>loan_status_d</th>\n",
       "      <th>loan_status_e</th>\n",
       "      <th>loan_status_f</th>\n",
       "      <th>loan_status_g</th>\n",
       "      <th>loan_status_h</th>\n",
       "      <th>loan_status_i</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3600.0</td>\n",
       "      <td>3600.0</td>\n",
       "      <td>3600.0</td>\n",
       "      <td>13.99</td>\n",
       "      <td>123.03</td>\n",
       "      <td>2</td>\n",
       "      <td>10.0</td>\n",
       "      <td>55000.0</td>\n",
       "      <td>5.91</td>\n",
       "      <td>0.0</td>\n",
       "      <td>675.0</td>\n",
       "      <td>679.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2765.0</td>\n",
       "      <td>29.7</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>4421.723917</td>\n",
       "      <td>4421.72</td>\n",
       "      <td>3600.00</td>\n",
       "      <td>821.72</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>122.67</td>\n",
       "      <td>564.0</td>\n",
       "      <td>560.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>722.0</td>\n",
       "      <td>144904.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>4981.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>722.0</td>\n",
       "      <td>34.0</td>\n",
       "      <td>9300.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>20701.0</td>\n",
       "      <td>1506.0</td>\n",
       "      <td>37.2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>128.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>76.9</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>178050.0</td>\n",
       "      <td>7746.0</td>\n",
       "      <td>2400.0</td>\n",
       "      <td>13734.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>24700.0</td>\n",
       "      <td>24700.0</td>\n",
       "      <td>24700.0</td>\n",
       "      <td>11.99</td>\n",
       "      <td>820.28</td>\n",
       "      <td>2</td>\n",
       "      <td>10.0</td>\n",
       "      <td>65000.0</td>\n",
       "      <td>16.06</td>\n",
       "      <td>1.0</td>\n",
       "      <td>715.0</td>\n",
       "      <td>719.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21470.0</td>\n",
       "      <td>19.2</td>\n",
       "      <td>38.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>25679.660000</td>\n",
       "      <td>25679.66</td>\n",
       "      <td>24700.00</td>\n",
       "      <td>979.66</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>926.35</td>\n",
       "      <td>699.0</td>\n",
       "      <td>695.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>204396.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>18005.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6472.0</td>\n",
       "      <td>29.0</td>\n",
       "      <td>111800.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9733.0</td>\n",
       "      <td>57830.0</td>\n",
       "      <td>27.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>113.0</td>\n",
       "      <td>192.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>27.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>97.4</td>\n",
       "      <td>7.7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>314017.0</td>\n",
       "      <td>39475.0</td>\n",
       "      <td>79300.0</td>\n",
       "      <td>24667.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20000.0</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>10.78</td>\n",
       "      <td>432.66</td>\n",
       "      <td>1</td>\n",
       "      <td>10.0</td>\n",
       "      <td>63000.0</td>\n",
       "      <td>10.78</td>\n",
       "      <td>0.0</td>\n",
       "      <td>695.0</td>\n",
       "      <td>699.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7869.0</td>\n",
       "      <td>56.2</td>\n",
       "      <td>18.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>22705.924294</td>\n",
       "      <td>22705.92</td>\n",
       "      <td>20000.00</td>\n",
       "      <td>2705.92</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>15813.30</td>\n",
       "      <td>704.0</td>\n",
       "      <td>700.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>189699.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>10827.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2081.0</td>\n",
       "      <td>65.0</td>\n",
       "      <td>14000.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>31617.0</td>\n",
       "      <td>2737.0</td>\n",
       "      <td>55.9</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>125.0</td>\n",
       "      <td>184.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>101.0</td>\n",
       "      <td>10.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>218418.0</td>\n",
       "      <td>18696.0</td>\n",
       "      <td>6200.0</td>\n",
       "      <td>14877.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>35000.0</td>\n",
       "      <td>35000.0</td>\n",
       "      <td>35000.0</td>\n",
       "      <td>14.85</td>\n",
       "      <td>829.90</td>\n",
       "      <td>2</td>\n",
       "      <td>10.0</td>\n",
       "      <td>110000.0</td>\n",
       "      <td>17.06</td>\n",
       "      <td>0.0</td>\n",
       "      <td>785.0</td>\n",
       "      <td>789.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7802.0</td>\n",
       "      <td>11.6</td>\n",
       "      <td>17.0</td>\n",
       "      <td>15897.65</td>\n",
       "      <td>15897.65</td>\n",
       "      <td>31464.010000</td>\n",
       "      <td>31464.01</td>\n",
       "      <td>19102.35</td>\n",
       "      <td>12361.66</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>829.90</td>\n",
       "      <td>679.0</td>\n",
       "      <td>675.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>301500.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>12609.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6987.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>67300.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>23192.0</td>\n",
       "      <td>54962.0</td>\n",
       "      <td>12.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>87.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>7.024194</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>381215.0</td>\n",
       "      <td>52226.0</td>\n",
       "      <td>62500.0</td>\n",
       "      <td>18000.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10400.0</td>\n",
       "      <td>10400.0</td>\n",
       "      <td>10400.0</td>\n",
       "      <td>22.45</td>\n",
       "      <td>289.91</td>\n",
       "      <td>4</td>\n",
       "      <td>3.0</td>\n",
       "      <td>104433.0</td>\n",
       "      <td>25.37</td>\n",
       "      <td>1.0</td>\n",
       "      <td>695.0</td>\n",
       "      <td>699.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21929.0</td>\n",
       "      <td>64.5</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>11740.500000</td>\n",
       "      <td>11740.50</td>\n",
       "      <td>10400.00</td>\n",
       "      <td>1340.50</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10128.96</td>\n",
       "      <td>704.0</td>\n",
       "      <td>700.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>331730.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>73839.0</td>\n",
       "      <td>84.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>9702.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>34000.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>27644.0</td>\n",
       "      <td>4567.0</td>\n",
       "      <td>77.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>128.0</td>\n",
       "      <td>210.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>96.6</td>\n",
       "      <td>60.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>439570.0</td>\n",
       "      <td>95768.0</td>\n",
       "      <td>20300.0</td>\n",
       "      <td>88097.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   loan_amnt  funded_amnt  funded_amnt_inv  int_rate  installment  grade  \\\n",
       "0     3600.0       3600.0           3600.0     13.99       123.03      2   \n",
       "1    24700.0      24700.0          24700.0     11.99       820.28      2   \n",
       "2    20000.0      20000.0          20000.0     10.78       432.66      1   \n",
       "3    35000.0      35000.0          35000.0     14.85       829.90      2   \n",
       "4    10400.0      10400.0          10400.0     22.45       289.91      4   \n",
       "\n",
       "   emp_length  annual_inc    dti  delinq_2yrs  fico_range_low  \\\n",
       "0        10.0     55000.0   5.91          0.0           675.0   \n",
       "1        10.0     65000.0  16.06          1.0           715.0   \n",
       "2        10.0     63000.0  10.78          0.0           695.0   \n",
       "3        10.0    110000.0  17.06          0.0           785.0   \n",
       "4         3.0    104433.0  25.37          1.0           695.0   \n",
       "\n",
       "   fico_range_high  inq_last_6mths  open_acc  pub_rec  revol_bal  revol_util  \\\n",
       "0            679.0             1.0       7.0      0.0     2765.0        29.7   \n",
       "1            719.0             4.0      22.0      0.0    21470.0        19.2   \n",
       "2            699.0             0.0       6.0      0.0     7869.0        56.2   \n",
       "3            789.0             0.0      13.0      0.0     7802.0        11.6   \n",
       "4            699.0             3.0      12.0      0.0    21929.0        64.5   \n",
       "\n",
       "   total_acc  out_prncp  out_prncp_inv   total_pymnt  total_pymnt_inv  \\\n",
       "0       13.0       0.00           0.00   4421.723917          4421.72   \n",
       "1       38.0       0.00           0.00  25679.660000         25679.66   \n",
       "2       18.0       0.00           0.00  22705.924294         22705.92   \n",
       "3       17.0   15897.65       15897.65  31464.010000         31464.01   \n",
       "4       35.0       0.00           0.00  11740.500000         11740.50   \n",
       "\n",
       "   total_rec_prncp  total_rec_int  total_rec_late_fee  recoveries  \\\n",
       "0          3600.00         821.72                 0.0         0.0   \n",
       "1         24700.00         979.66                 0.0         0.0   \n",
       "2         20000.00        2705.92                 0.0         0.0   \n",
       "3         19102.35       12361.66                 0.0         0.0   \n",
       "4         10400.00        1340.50                 0.0         0.0   \n",
       "\n",
       "   collection_recovery_fee  last_pymnt_amnt  last_fico_range_high  \\\n",
       "0                      0.0           122.67                 564.0   \n",
       "1                      0.0           926.35                 699.0   \n",
       "2                      0.0         15813.30                 704.0   \n",
       "3                      0.0           829.90                 679.0   \n",
       "4                      0.0         10128.96                 704.0   \n",
       "\n",
       "   last_fico_range_low  collections_12_mths_ex_med  acc_now_delinq  \\\n",
       "0                560.0                         0.0             0.0   \n",
       "1                695.0                         0.0             0.0   \n",
       "2                700.0                         0.0             0.0   \n",
       "3                675.0                         0.0             0.0   \n",
       "4                700.0                         0.0             0.0   \n",
       "\n",
       "   tot_coll_amt  tot_cur_bal  open_acc_6m  open_act_il  open_il_12m  \\\n",
       "0         722.0     144904.0          2.0          2.0          0.0   \n",
       "1           0.0     204396.0          1.0          1.0          0.0   \n",
       "2           0.0     189699.0          0.0          1.0          0.0   \n",
       "3           0.0     301500.0          1.0          1.0          0.0   \n",
       "4           0.0     331730.0          1.0          3.0          0.0   \n",
       "\n",
       "   open_il_24m  mths_since_rcnt_il  total_bal_il  il_util  open_rv_12m  \\\n",
       "0          1.0                21.0        4981.0     36.0          3.0   \n",
       "1          1.0                19.0       18005.0     73.0          2.0   \n",
       "2          4.0                19.0       10827.0     73.0          0.0   \n",
       "3          1.0                23.0       12609.0     70.0          1.0   \n",
       "4          3.0                14.0       73839.0     84.0          4.0   \n",
       "\n",
       "   open_rv_24m  max_bal_bc  all_util  total_rev_hi_lim  inq_fi  total_cu_tl  \\\n",
       "0          3.0       722.0      34.0            9300.0     3.0          1.0   \n",
       "1          3.0      6472.0      29.0          111800.0     0.0          0.0   \n",
       "2          2.0      2081.0      65.0           14000.0     2.0          5.0   \n",
       "3          1.0      6987.0      45.0           67300.0     0.0          1.0   \n",
       "4          7.0      9702.0      78.0           34000.0     2.0          1.0   \n",
       "\n",
       "   inq_last_12m  acc_open_past_24mths  avg_cur_bal  bc_open_to_buy  bc_util  \\\n",
       "0           4.0                   4.0      20701.0          1506.0     37.2   \n",
       "1           6.0                   4.0       9733.0         57830.0     27.1   \n",
       "2           1.0                   6.0      31617.0          2737.0     55.9   \n",
       "3           0.0                   2.0      23192.0         54962.0     12.1   \n",
       "4           3.0                  10.0      27644.0          4567.0     77.5   \n",
       "\n",
       "   chargeoff_within_12_mths  delinq_amnt  mo_sin_old_il_acct  \\\n",
       "0                       0.0          0.0               148.0   \n",
       "1                       0.0          0.0               113.0   \n",
       "2                       0.0          0.0               125.0   \n",
       "3                       0.0          0.0                36.0   \n",
       "4                       0.0          0.0               128.0   \n",
       "\n",
       "   mo_sin_old_rev_tl_op  mo_sin_rcnt_rev_tl_op  mo_sin_rcnt_tl  mort_acc  \\\n",
       "0                 128.0                    3.0             3.0       1.0   \n",
       "1                 192.0                    2.0             2.0       4.0   \n",
       "2                 184.0                   14.0            14.0       5.0   \n",
       "3                  87.0                    2.0             2.0       1.0   \n",
       "4                 210.0                    4.0             4.0       6.0   \n",
       "\n",
       "   mths_since_recent_bc  mths_since_recent_inq  num_accts_ever_120_pd  \\\n",
       "0                   4.0               4.000000                    2.0   \n",
       "1                   2.0               0.000000                    0.0   \n",
       "2                 101.0              10.000000                    0.0   \n",
       "3                   2.0               7.024194                    0.0   \n",
       "4                   4.0               1.000000                    0.0   \n",
       "\n",
       "   num_actv_bc_tl  num_actv_rev_tl  num_bc_sats  num_bc_tl  num_il_tl  \\\n",
       "0             2.0              4.0          2.0        5.0        3.0   \n",
       "1             5.0              5.0         13.0       17.0        6.0   \n",
       "2             2.0              3.0          2.0        4.0        6.0   \n",
       "3             4.0              5.0          8.0       10.0        2.0   \n",
       "4             4.0              6.0          5.0        9.0       10.0   \n",
       "\n",
       "   num_op_rev_tl  num_rev_accts  num_rev_tl_bal_gt_0  num_sats  \\\n",
       "0            4.0            9.0                  4.0       7.0   \n",
       "1           20.0           27.0                  5.0      22.0   \n",
       "2            4.0            7.0                  3.0       6.0   \n",
       "3           10.0           13.0                  5.0      13.0   \n",
       "4            7.0           19.0                  6.0      12.0   \n",
       "\n",
       "   num_tl_120dpd_2m  num_tl_30dpd  num_tl_90g_dpd_24m  num_tl_op_past_12m  \\\n",
       "0               0.0           0.0                 0.0                 3.0   \n",
       "1               0.0           0.0                 0.0                 2.0   \n",
       "2               0.0           0.0                 0.0                 0.0   \n",
       "3               0.0           0.0                 0.0                 1.0   \n",
       "4               0.0           0.0                 0.0                 4.0   \n",
       "\n",
       "   pct_tl_nvr_dlq  percent_bc_gt_75  pub_rec_bankruptcies  tax_liens  \\\n",
       "0            76.9               0.0                   0.0        0.0   \n",
       "1            97.4               7.7                   0.0        0.0   \n",
       "2           100.0              50.0                   0.0        0.0   \n",
       "3           100.0               0.0                   0.0        0.0   \n",
       "4            96.6              60.0                   0.0        0.0   \n",
       "\n",
       "   tot_hi_cred_lim  total_bal_ex_mort  total_bc_limit  \\\n",
       "0         178050.0             7746.0          2400.0   \n",
       "1         314017.0            39475.0         79300.0   \n",
       "2         218418.0            18696.0          6200.0   \n",
       "3         381215.0            52226.0         62500.0   \n",
       "4         439570.0            95768.0         20300.0   \n",
       "\n",
       "   total_il_high_credit_limit  home_ownership_a  home_ownership_b  \\\n",
       "0                     13734.0                 1                 0   \n",
       "1                     24667.0                 1                 0   \n",
       "2                     14877.0                 1                 0   \n",
       "3                     18000.0                 1                 0   \n",
       "4                     88097.0                 1                 0   \n",
       "\n",
       "   home_ownership_c  home_ownership_d  home_ownership_e  home_ownership_f  \\\n",
       "0                 0                 0                 0                 0   \n",
       "1                 0                 0                 0                 0   \n",
       "2                 0                 0                 0                 0   \n",
       "3                 0                 0                 0                 0   \n",
       "4                 0                 0                 0                 0   \n",
       "\n",
       "   verification_status_a  verification_status_b  verification_status_c  \\\n",
       "0                      0                      1                      0   \n",
       "1                      0                      1                      0   \n",
       "2                      0                      1                      0   \n",
       "3                      1                      0                      0   \n",
       "4                      1                      0                      0   \n",
       "\n",
       "   loan_status_a  loan_status_b  loan_status_c  loan_status_d  loan_status_e  \\\n",
       "0              1              0              0              0              0   \n",
       "1              1              0              0              0              0   \n",
       "2              1              0              0              0              0   \n",
       "3              0              1              0              0              0   \n",
       "4              1              0              0              0              0   \n",
       "\n",
       "   loan_status_f  loan_status_g  loan_status_h  loan_status_i  \n",
       "0              0              0              0              0  \n",
       "1              0              0              0              0  \n",
       "2              0              0              0              0  \n",
       "3              0              0              0              0  \n",
       "4              0              0              0              0  "
      ]
     },
     "execution_count": 164,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "id": "fa3e4ce9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "loan_amnt                     0\n",
       "funded_amnt                   0\n",
       "funded_amnt_inv               0\n",
       "int_rate                      0\n",
       "installment                   0\n",
       "grade                         0\n",
       "emp_length                    0\n",
       "annual_inc                    0\n",
       "dti                           0\n",
       "delinq_2yrs                   0\n",
       "fico_range_low                0\n",
       "fico_range_high               0\n",
       "inq_last_6mths                0\n",
       "open_acc                      0\n",
       "pub_rec                       0\n",
       "revol_bal                     0\n",
       "revol_util                    0\n",
       "total_acc                     0\n",
       "out_prncp                     0\n",
       "out_prncp_inv                 0\n",
       "total_pymnt                   0\n",
       "total_pymnt_inv               0\n",
       "total_rec_prncp               0\n",
       "total_rec_int                 0\n",
       "total_rec_late_fee            0\n",
       "recoveries                    0\n",
       "collection_recovery_fee       0\n",
       "last_pymnt_amnt               0\n",
       "last_fico_range_high          0\n",
       "last_fico_range_low           0\n",
       "collections_12_mths_ex_med    0\n",
       "acc_now_delinq                0\n",
       "tot_coll_amt                  0\n",
       "tot_cur_bal                   0\n",
       "open_acc_6m                   0\n",
       "open_act_il                   0\n",
       "open_il_12m                   0\n",
       "open_il_24m                   0\n",
       "mths_since_rcnt_il            0\n",
       "total_bal_il                  0\n",
       "il_util                       0\n",
       "open_rv_12m                   0\n",
       "open_rv_24m                   0\n",
       "max_bal_bc                    0\n",
       "all_util                      0\n",
       "total_rev_hi_lim              0\n",
       "inq_fi                        0\n",
       "total_cu_tl                   0\n",
       "inq_last_12m                  0\n",
       "acc_open_past_24mths          0\n",
       "avg_cur_bal                   0\n",
       "bc_open_to_buy                0\n",
       "bc_util                       0\n",
       "chargeoff_within_12_mths      0\n",
       "delinq_amnt                   0\n",
       "mo_sin_old_il_acct            0\n",
       "mo_sin_old_rev_tl_op          0\n",
       "mo_sin_rcnt_rev_tl_op         0\n",
       "mo_sin_rcnt_tl                0\n",
       "mort_acc                      0\n",
       "mths_since_recent_bc          0\n",
       "mths_since_recent_inq         0\n",
       "num_accts_ever_120_pd         0\n",
       "num_actv_bc_tl                0\n",
       "num_actv_rev_tl               0\n",
       "num_bc_sats                   0\n",
       "num_bc_tl                     0\n",
       "num_il_tl                     0\n",
       "num_op_rev_tl                 0\n",
       "num_rev_accts                 0\n",
       "num_rev_tl_bal_gt_0           0\n",
       "num_sats                      0\n",
       "num_tl_120dpd_2m              0\n",
       "num_tl_30dpd                  0\n",
       "num_tl_90g_dpd_24m            0\n",
       "num_tl_op_past_12m            0\n",
       "pct_tl_nvr_dlq                0\n",
       "percent_bc_gt_75              0\n",
       "pub_rec_bankruptcies          0\n",
       "tax_liens                     0\n",
       "tot_hi_cred_lim               0\n",
       "total_bal_ex_mort             0\n",
       "total_bc_limit                0\n",
       "total_il_high_credit_limit    0\n",
       "home_ownership_a              0\n",
       "home_ownership_b              0\n",
       "home_ownership_c              0\n",
       "home_ownership_d              0\n",
       "home_ownership_e              0\n",
       "home_ownership_f              0\n",
       "verification_status_a         0\n",
       "verification_status_b         0\n",
       "verification_status_c         0\n",
       "loan_status_a                 0\n",
       "loan_status_b                 0\n",
       "loan_status_c                 0\n",
       "loan_status_d                 0\n",
       "loan_status_e                 0\n",
       "loan_status_f                 0\n",
       "loan_status_g                 0\n",
       "loan_status_h                 0\n",
       "loan_status_i                 0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 165,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.isnull().sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c9719b1c",
   "metadata": {},
   "source": [
    "## 模型预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "id": "d999a33b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['loan_amnt',\n",
       " 'funded_amnt',\n",
       " 'funded_amnt_inv',\n",
       " 'int_rate',\n",
       " 'installment',\n",
       " 'emp_length',\n",
       " 'annual_inc',\n",
       " 'dti',\n",
       " 'delinq_2yrs',\n",
       " 'fico_range_low',\n",
       " 'fico_range_high',\n",
       " 'inq_last_6mths',\n",
       " 'open_acc',\n",
       " 'pub_rec',\n",
       " 'revol_bal',\n",
       " 'revol_util',\n",
       " 'total_acc',\n",
       " 'out_prncp',\n",
       " 'out_prncp_inv',\n",
       " 'total_pymnt',\n",
       " 'total_pymnt_inv',\n",
       " 'total_rec_prncp',\n",
       " 'total_rec_int',\n",
       " 'total_rec_late_fee',\n",
       " 'recoveries',\n",
       " 'collection_recovery_fee',\n",
       " 'last_pymnt_amnt',\n",
       " 'last_fico_range_high',\n",
       " 'last_fico_range_low',\n",
       " 'collections_12_mths_ex_med',\n",
       " 'acc_now_delinq',\n",
       " 'tot_coll_amt',\n",
       " 'tot_cur_bal',\n",
       " 'open_acc_6m',\n",
       " 'open_act_il',\n",
       " 'open_il_12m',\n",
       " 'open_il_24m',\n",
       " 'mths_since_rcnt_il',\n",
       " 'total_bal_il',\n",
       " 'il_util',\n",
       " 'open_rv_12m',\n",
       " 'open_rv_24m',\n",
       " 'max_bal_bc',\n",
       " 'all_util',\n",
       " 'total_rev_hi_lim',\n",
       " 'inq_fi',\n",
       " 'total_cu_tl',\n",
       " 'inq_last_12m',\n",
       " 'acc_open_past_24mths',\n",
       " 'avg_cur_bal',\n",
       " 'bc_open_to_buy',\n",
       " 'bc_util',\n",
       " 'chargeoff_within_12_mths',\n",
       " 'delinq_amnt',\n",
       " 'mo_sin_old_il_acct',\n",
       " 'mo_sin_old_rev_tl_op',\n",
       " 'mo_sin_rcnt_rev_tl_op',\n",
       " 'mo_sin_rcnt_tl',\n",
       " 'mort_acc',\n",
       " 'mths_since_recent_bc',\n",
       " 'mths_since_recent_inq',\n",
       " 'num_accts_ever_120_pd',\n",
       " 'num_actv_bc_tl',\n",
       " 'num_actv_rev_tl',\n",
       " 'num_bc_sats',\n",
       " 'num_bc_tl',\n",
       " 'num_il_tl',\n",
       " 'num_op_rev_tl',\n",
       " 'num_rev_accts',\n",
       " 'num_rev_tl_bal_gt_0',\n",
       " 'num_sats',\n",
       " 'num_tl_120dpd_2m',\n",
       " 'num_tl_30dpd',\n",
       " 'num_tl_90g_dpd_24m',\n",
       " 'num_tl_op_past_12m',\n",
       " 'pct_tl_nvr_dlq',\n",
       " 'percent_bc_gt_75',\n",
       " 'pub_rec_bankruptcies',\n",
       " 'tax_liens',\n",
       " 'tot_hi_cred_lim',\n",
       " 'total_bal_ex_mort',\n",
       " 'total_bc_limit',\n",
       " 'total_il_high_credit_limit',\n",
       " 'home_ownership_a',\n",
       " 'home_ownership_b',\n",
       " 'home_ownership_c',\n",
       " 'home_ownership_d',\n",
       " 'home_ownership_e',\n",
       " 'home_ownership_f',\n",
       " 'verification_status_a',\n",
       " 'verification_status_b',\n",
       " 'verification_status_c',\n",
       " 'loan_status_a',\n",
       " 'loan_status_b',\n",
       " 'loan_status_c',\n",
       " 'loan_status_d',\n",
       " 'loan_status_e',\n",
       " 'loan_status_f',\n",
       " 'loan_status_g',\n",
       " 'loan_status_h',\n",
       " 'loan_status_i']"
      ]
     },
     "execution_count": 166,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "feature_cols = data.columns.tolist()\n",
    "feature_cols.remove('grade')\n",
    "feature_cols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 167,
   "id": "bbf3a5ed",
   "metadata": {},
   "outputs": [],
   "source": [
    "X = data[feature_cols]\n",
    "y = data['grade']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 168,
   "id": "47a69391",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "465753d5",
   "metadata": {},
   "source": [
    "### BernoulliNB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 169,
   "id": "787767f8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.32447460266204264"
      ]
     },
     "execution_count": 169,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.naive_bayes import BernoulliNB\n",
    "bnb = BernoulliNB().fit(X_train, y_train)\n",
    "bnb_y_predict = bnb.predict(X_test)\n",
    "accuracy_score(y_test, bnb_y_predict)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e1c804e6",
   "metadata": {},
   "source": [
    "### GaussianNB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 170,
   "id": "c4ba4fc3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.4068439887290051"
      ]
     },
     "execution_count": 170,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.naive_bayes import GaussianNB\n",
    "gnb = GaussianNB().fit(X_train, y_train)\n",
    "gnb_y_predict = gnb.predict(X_test)\n",
    "accuracy_score(y_test, gnb_y_predict)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a29ec606",
   "metadata": {},
   "source": [
    "### DecisionTreeClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "id": "6366b351",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9792406675896969"
      ]
     },
     "execution_count": 171,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.tree import DecisionTreeClassifier\n",
    "dtc = DecisionTreeClassifier().fit(X_train, y_train)\n",
    "dtc_y_predict = dtc.predict(X_test)\n",
    "accuracy_score(y_test, dtc_y_predict)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
